This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Should include all data files:
library(dplyr)
library(tibble)
library(tidyr)
library(GGally)
library(corrplot)
library(vegan)
library(pvclust)
library(mgcv)
library(knitr)
library(kableExtra)
library(viridis)
library(ggpubr)
library(rcompanion)
library(vegan)
library(cluster)
library(readxl)
library(geosphere)
library(dendextend)
library(ggrepel)
library(FactoMineR)
library(factoextra)
library(purrr)
library(stringr)
library(ggforce)
library(ggridges)
library(forcats)
library(indicspecies)
library(betapart)
library(plotly)
library(htmltools)
library(lubridate)
#remotes::install_github("MikkoVihtakari/ggOceanMaps")
library(ggOceanMaps)
#expanded data - DATASET A
data.a <- read.csv("./expanded_data_CR.csv", sep = ",", header=TRUE) #extracted from raw data where counts are translated into rows - This is also called Dataset A
# wide data - DATASET B
data.b <- read.csv("./wide_data_CR.csv", sep = ",", header=TRUE) #this file is wide data plus all the env data that I extracted manually - This is also called Dataset B, and will become dataset C when aggregated by site
#Load taxa sheet file - TAXA
taxa <- read.csv("./taxa_sheet_all_CR.csv", sep = ",", header=TRUE) #all morphotypes and their metadata
#Load the forest data
garden_length_df <- read.csv("./forest_data_CR.csv", sep = ",", header=TRUE)
Before starting any data manipulation, getting some data summaries and basic statistics
jump_threshold <- 100 # max allowed segment in meters
smoothing_spar <- 0.7
# Process data: smoothing, segment distances, split by jumps. This also account for the 3D distance, so it takes vertical/depth distance into account!
process_dive <- function(dive_df) {
dive_df <- dive_df %>%
filter(!is.na(time), !is.na(lat), !is.na(lon), !is.na(depth)) %>%
arrange(time) %>%
mutate(
time = as.POSIXct(time, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"),
time_sec = as.numeric(difftime(time, min(time), units = "secs")),
lat_smooth = predict(smooth.spline(time_sec, lat, spar = smoothing_spar), time_sec)$y,
lon_smooth = predict(smooth.spline(time_sec, lon, spar = smoothing_spar), time_sec)$y
) %>%
mutate(
segment_distance_horizontal = c(0, distHaversine(
cbind(lon_smooth[-n()], lat_smooth[-n()]),
cbind(lon_smooth[-1], lat_smooth[-1])
)),
segment_distance_vertical = c(0, abs(depth[-1] - depth[-n()])), #remove this if you dont want 3D! This uses the Pythagoras function, to get the distance.
segment_distance_m = sqrt(segment_distance_horizontal^2 + segment_distance_vertical^2), #remove this if you dont want 3D!
track_segment = cumsum(c(FALSE, segment_distance_m[-1] > jump_threshold)) + 1
) %>%
group_by(track_segment) %>%
mutate(cum_distance_m = cumsum(segment_distance_m)) %>%
ungroup()
}
# Apply to all dives
data.segmented <- data.a %>%
split(.$dive_name) %>%
lapply(process_dive) %>%
bind_rows()
summary_table <- data.segmented %>%
group_by(dive_name) %>%
summarise(
total_track_length_m = sum(segment_distance_m, na.rm = TRUE), # this excludes jumps
min_depth = min(depth, na.rm = TRUE),
max_depth = max(depth, na.rm = TRUE),
mean_depth = mean(depth, na.rm = TRUE),
n_points = n(),
n_segments = n_distinct(track_segment)
)
jump_distances <- data.segmented %>%
group_by(dive_name) %>%
arrange(time) %>%
summarise(
jump_dist_m = sum(
distHaversine(
cbind(lon_smooth[-n()], lat_smooth[-n()]),
cbind(lon_smooth[-1], lat_smooth[-1])
) * (track_segment[-1] != track_segment[-n()])
, na.rm=TRUE)
)
corrected_summary <- summary_table %>%
left_join(jump_distances, by = "dive_name") %>%
mutate(
jump_dist_m = ifelse(is.na(jump_dist_m), 0, jump_dist_m),
total_track_length_no_jumps = total_track_length_m - jump_dist_m
)
corrected_summary
## # A tibble: 9 × 9
## dive_name total_track_length_m min_depth max_depth mean_depth n_points
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 EX2104_Dive03 Ho… 534. 2375. 2641. 2492. 672
## 2 EX2104_Dive04 Du… 579. 2256. 2414. 2369. 601
## 3 EX2104_Dive05 Ro… 328. 4113. 4189. 4156. 53
## 4 EX2104_Dive06 Ca… 553. 2084. 2332. 2174. 275
## 5 EX2104_Dive07 Co… 354. 2430. 2587. 2494. 317
## 6 EX2104_Dive08 Ma… 929. 940. 1274. 1022. 741
## 7 EX2104_Dive09 Ya… 560. 1194. 1368. 1269. 261
## 8 EX2104_Dive10 Ya… 607. 1699. 1984. 1841. 215
## 9 EX2104_Dive11 Ca… 574. 1203. 1250. 1228. 1520
## # ℹ 3 more variables: n_segments <int>, jump_dist_m <dbl>,
## # total_track_length_no_jumps <dbl>
# Depth profile plot
ggplot(data.segmented, aes(cum_distance_m, depth, group = interaction(dive_name, track_segment), color = dive_name)) +
geom_line() + scale_y_reverse() + theme_minimal() +
labs(title = "Depth Profile Along Track", x = "Cumulative Distance (m)", y = "Depth (m)")
# 3D plot with segments
plot_list <- list()
for (dive_name in unique(data.segmented$dive_name)) {
dive_df <- filter(data.segmented, dive_name == !!dive_name)
p <- plot_ly()
# Original track
p <- p %>% add_trace(
data = dive_df, x = ~lon, y = ~lat, z = ~depth,
type = 'scatter3d', mode = 'lines',
line = list(color = 'rgba(100,255,255,0.4)', width = 1),
name = 'Original'
)
# Each cleaned segment
for (seg_id in unique(dive_df$track_segment)) {
seg_df <- filter(dive_df, track_segment == seg_id)
p <- p %>% add_trace(
data = seg_df,
x = ~lon_smooth, y = ~lat_smooth, z = ~depth,
type = 'scatter3d', mode = 'lines',
line = list(width = 7, color = seg_df$depth, colorscale = 'Viridis'),
hoverinfo = 'text',
text = ~paste("Depth:", round(depth,1), "m<br>Dist:", round(cum_distance_m), "m"),
showlegend = FALSE
)
}
p <- p %>%
layout(
title = list(text = paste("3D Track (Dive", dive_name, ")"), font = list(color = "white")),
scene = list(
xaxis = list(title = "Longitude", titlefont=list(color="white"), tickfont=list(color="white")),
yaxis = list(title = "Latitude", titlefont=list(color="white"), tickfont=list(color="white")),
zaxis = list(title = "Depth (m)", titlefont=list(color="white"), tickfont=list(color="white"), autorange = "reversed"),
bgcolor = "#000000"
),
paper_bgcolor = '#000000', plot_bgcolor = '#000000'
)
plot_list[[dive_name]] <- p
}
# Output all plots together in the markdown
tagList(plot_list)
Here I calculate the basics, like how many taxa, how many observations etc. This uses the full expanded dataset (Dataset A) and the taxa sheet.
#How many total morphotypes?
nrow(taxa)
## [1] 112
#How many total coral morphotypes?
sum(taxa$Phylum == "Cnidaria")
## [1] 60
#How many total sponge morphotypes?
sum(taxa$Phylum == "Porifera")
## [1] 52
#How many total observations?
nrow(data.a) #counting all rows of the Dataset A
## [1] 4655
#How many coral observations?
sum(data.a$Phylum == "Cnidaria")
## [1] 2240
#How many sponge observations?
sum(data.a$Phylum == "Porifera")
## [1] 2415
#How many coral observations of species level?
sum(data.a$Phylum == "Cnidaria" & data.a$Species != "")
## [1] 1030
#How many sponge observations of species level?
sum(data.a$Phylum == "Porifera" & data.a$Species != "")
## [1] 0
#How many (and which) morphotypes occurred in a single dive only?
# First, aggregate the data to count the occurrences of each morphotype across all dives
morphotype_counts <- data.a %>%
group_by(Morphotype) %>%
summarise(count = n_distinct(dive_name))
# Filter the aggregated data to include only morphotypes that occurred in a single dive
morphotypes_single_dive <- morphotype_counts %>%
filter(count == 1) #change number for what to check
# count the number of morphotypes that occurred in a single dive
num_single_dive_morphotypes <- nrow(morphotypes_single_dive)
# Print the number of morphotypes that occurred in a single dive
print(num_single_dive_morphotypes)
## [1] 48
# Frequency of this number:
(100/(length(unique(data.a$Morphotype))))*num_single_dive_morphotypes
## [1] 42.85714
# Can print the morphotypes that occurred in a single dive only:
#knitr::kable(morphotypes_single_dive, caption = "Morphotypes that occures in a single dive location only")
Now I want to plot density (number of counts) by depth and divided for each phylum. Since some depths have been visited several times, I am scaling the counts by the length (in meters) of each dive (from transect duration/length above), which results in an effort-corrected count. This way there is no bias on the density, e.g. if more time spent in 1000 m because there were 2 dives. The plot also gives an overview of the highest abundances, and how the abundances change over depth.
### Effort correct based in transect lengths! Different sampling efforts, so need to acount for that (or at least check) Transect length instead of time, as it might be biased by time spend somewhere like a zoom or going up column again ....
# count occurrences per depth and Phylum
df_summary <- data.a %>%
group_by(depth, Phylum, dive_name) %>%
summarise(count = n(), .groups = "drop")
# Merge dive track length with df_summary
df_corrected <- df_summary %>%
left_join(corrected_summary, by = "dive_name") %>%
mutate(transect_m = as.numeric(total_track_length_no_jumps), #thats in meters, accounts for the off=bottom time
Effort_Corrected_count = count / transect_m) # Normalize counts by meter spend during time
# Plot density with effort correction
ggplot(df_corrected, aes(x = depth, fill = Phylum, weight = Effort_Corrected_count)) +
geom_density(alpha = 0.7) + # Density plot with transparency
scale_fill_manual(values = c("Cnidaria" = "#d8514e", "Porifera" = "#f2c714")) + # Custom colors
scale_x_reverse(breaks=seq(800, 5000, 200)) + # Flip depth axis so shallowest is on top
coord_flip() +
labs(x = "depth (m)", y = "Effort-Corrected Density", fill = "Phylum") +
theme_minimal()
#for paper
# Plot density with effort correction
ggplot(df_corrected, aes(x = depth, fill = Phylum, weight = Effort_Corrected_count)) +
geom_density(alpha = 0.7) + # Density plot with transparency
scale_fill_manual(values = c("Cnidaria" = "#d8514e", "Porifera" = "#f2c714")) + # Custom colors
labs(x = "depth (m)", y = "Effort-Corrected Density", fill = "Phylum") +
theme_minimal()
#plot without effort correction
ggplot(data.a, aes(x = depth, fill = Phylum)) +
geom_density(alpha = 0.7) + # Adjust transparency
scale_fill_manual(values = c("Cnidaria" = "#d8514e", "Porifera" = "#f2c714")) +
labs(x = "depth (m)", y = "Density", fill = "Phylum") +
theme_minimal() +
coord_flip() +
scale_x_reverse(breaks=seq(800, 5000, 200))
# Bin the df into depth for a test on the abundances by depth - bins in 5 meter, but can be adapted.
df_corrected <- df_corrected %>%
mutate(depth_bin = cut(depth, breaks = seq(0, max(depth), by = 5), include.lowest = TRUE)) %>%
mutate(depth_bin = as.factor(depth_bin))
# Is there a linear effect?
glm_model <- glm(Effort_Corrected_count ~ depth, data = df_corrected, family = quasipoisson) # need to use quasipoisson because its non-integer counts with the correction
summary(glm_model) # No
##
## Call:
## glm(formula = Effort_Corrected_count ~ depth, family = quasipoisson,
## data = df_corrected)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.500e+00 1.580e-01 -28.484 < 2e-16 ***
## depth -3.859e-04 8.528e-05 -4.525 6.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.02411202)
##
## Null deviance: 15.614 on 1659 degrees of freedom
## Residual deviance: 15.101 on 1658 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 7
glm_numeric <- glm(Effort_Corrected_count ~ as.numeric(depth), data = df_corrected, family = quasipoisson)
summary(glm_numeric) # No
##
## Call:
## glm(formula = Effort_Corrected_count ~ as.numeric(depth), family = quasipoisson,
## data = df_corrected)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.500e+00 1.580e-01 -28.484 < 2e-16 ***
## as.numeric(depth) -3.859e-04 8.528e-05 -4.525 6.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 0.02411202)
##
## Null deviance: 15.614 on 1659 degrees of freedom
## Residual deviance: 15.101 on 1658 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 7
# Is there a non-linear effect?
gam_model <- gam(Effort_Corrected_count ~ s(depth), data = df_corrected, family = quasipoisson)
summary(gam_model) # yes, significant, meaning that there are peaks with higher abundances, but no linear decline.
##
## Family: quasipoisson
## Link function: log
##
## Formula:
## Effort_Corrected_count ~ s(depth)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.31207 0.04907 -108.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(depth) 8.243 8.828 13.66 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0471 Deviance explained = 13.5%
## GCV = 0.0082257 Scale est. = 0.017892 n = 1660
plot(gam_model)
# Is there an overall depth effect?
anova(glm_model, test = "Chisq") # no
## Analysis of Deviance Table
##
## Model: quasipoisson, link: log
##
## Response: Effort_Corrected_count
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 1659 15.614
## depth 1 0.51327 1658 15.101 3.954e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# And if we use the non-corrected data?
glm_model <- glm(count ~ depth, data = df_summary, family = poisson)
summary(glm_model) # Poisson - this is significant, but might be biased because of the sampling effort
##
## Call:
## glm(formula = count ~ depth, family = poisson, data = df_summary)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.834e+00 4.493e-02 40.83 <2e-16 ***
## depth -4.429e-04 2.454e-05 -18.05 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 7722.7 on 1659 degrees of freedom
## Residual deviance: 7381.7 on 1658 degrees of freedom
## AIC: 11416
##
## Number of Fisher Scoring iterations: 6
In geom_density(), the density represents a smoothed estimate of the distribution of observations across depth. It’s similar to a histogram but smoothed into a continuous curve. The uncorrected plot could over-represent depths where more time was spent during each dive, also overlapping dives. The effort-corrected plot gives a better idea of relative abundance per unit effort, making it more comparable across depths.
The statistical tests on depth effect on abundances shows a complex situation. But there is not a linear trend, that is abundances are not decreasing with depth, but that there are peaks. From the GAM model there are three large peaks (this could also be because of the sites…), but the middle peak is the lowest.
Continuing with morphotypes per depth, as overview of how many morphotyes were seen across the depth.
# count unique morphotypes per depth for each phylum
morpho_diversity <- data.a %>%
group_by(depth, Phylum) %>% # by each depth now, but can also be done with depth bin, replacing than depth with depth_bin
summarise(Unique_Morphotypes = n_distinct(Morphotype), .groups = "drop")
df_corrected <- df_summary %>%
left_join(corrected_summary, by = "dive_name") %>%
mutate(transect_m = as.numeric(total_track_length_no_jumps), #thats in meters, accounts for the off=bottom time
Effort_Corrected_count = count / transect_m)
# Step 2: Merge with df_corrected (Ensuring Consistency)
df_corrected <- df_corrected %>%
left_join(morpho_diversity, by = c("depth", "Phylum"))
df_corrected <- df_corrected %>%
mutate(Effort_Corrected_Div = Unique_Morphotypes / transect_m) # Normalize counts by meters
ggplot(df_corrected, aes(x = depth, fill = Phylum, weight = Effort_Corrected_count)) +
#geom_density(alpha = 0.7) + # Density plot with transparency
geom_smooth(aes(y = Effort_Corrected_Div, color = Phylum), method = "loess", se = TRUE) +
scale_fill_manual(values = c("Cnidaria" = "#d8514e", "Porifera" = "#f2c714")) + # Custom color
#coord_flip() +
labs(x = "depth (m)", y = "Effort-Corrected Diversity", fill = "Phylum") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#scale_x_reverse(breaks=seq(800, 5000, 200))
Next another density plot / ridgleine plot for representation by dive sites
#Assign clusters - this will be explained later in the script. But can be used here in the plots already ...
cluster_assignments <- data.frame(
dive_name = c(
"EX2104_Dive03 Hopscotch Seamount",
"EX2104_Dive04 Dumbbell Seamount",
"EX2104_Dive05 Rockaway Seamount",
"EX2104_Dive06 Castle Rock Seamount",
"EX2104_Dive07 Corner Rise 1 Seamount",
"EX2104_Dive08 MacGregor Seamount",
"EX2104_Dive09 Yakutat Shallow Seamount",
"EX2104_Dive10 Yakutat Deep Seamount",
"EX2104_Dive11 Caloosahatchee Seamount"
),
cluster = c("cluster1", "cluster1", "deep", "cluster1", "cluster1", "cluster2", "cluster2", "cluster2", "cluster2")) # This is based on the upcoming analysis, but I wanted to include it here.
# Optional: filter just Cnidaria and Porifera
plot_df <- data.a %>%
filter(Phylum %in% c("Cnidaria", "Porifera"))
# Order locations by mean depth
location_order <- plot_df %>%
group_by(dive_name) %>%
summarise(mean_depth = mean(depth, na.rm = TRUE)) %>%
arrange(mean_depth) %>%
pull(dive_name)
# Apply order to factor
plot_df$dive_name <- factor(plot_df$dive_name, levels = location_order)
# Merge cluster info
plot_df <- data.a %>%
filter(Phylum %in% c("Cnidaria", "Porifera")) %>%
left_join(cluster_assignments, by = c("dive_name" = "dive_name"))
# Order locations by mean depth
location_order <- plot_df %>%
group_by(dive_name) %>%
summarise(mean_depth = mean(depth, na.rm = TRUE)) %>%
arrange(mean_depth) %>%
pull(dive_name)
plot_df$dive_name <- factor(plot_df$dive_name, levels = location_order)
plot_df$cluster <- factor(plot_df$cluster) # ensure cluster is a factor
ggplot(plot_df, aes(x = depth, y = dive_name, fill = Phylum)) +
geom_density_ridges(
aes(group = interaction(dive_name, Phylum)),
alpha = 0.6,
scale = 2, # Lowered from 3.5 for broader peaks
rel_min_height = 0.01,
color = "white",
size = 0.3
) +
#scale_x_reverse(expand = expansion(mult = c(0.01, 0.05))) + # More space on x-axis
scale_fill_manual(values = c("Cnidaria" = "#0072B2", "Porifera" = "#D55E00")) +
facet_grid(cluster ~ ., scales = "free_y", space = "free_y") + # Stack by cluster
labs(
x = "Depth (m)",
y = NULL,
title = "Depth Distributions of Coral and Sponge Annotations by Dive",
fill = "Phylum"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "top",
strip.text.y = element_text(angle = 0, face = "bold"),
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 10)
)
## Warning in geom_density_ridges(aes(group = interaction(dive_name, Phylum)), :
## Ignoring unknown parameters: `size`
## Picking joint bandwidth of 12.4
## Picking joint bandwidth of 19.7
## Picking joint bandwidth of 11.8
#add morphotype richness
# Calculate morphotype richness per dive and phylum (total across all depths)
richness_summary <- plot_df %>%
group_by(dive_name, Phylum) %>%
summarise(morphotype_richness = n_distinct(Morphotype), .groups = "drop")
richness_summary$dive_name <- factor(richness_summary$dive_name, levels = location_order)
# Ridgeline plot of abundance/density (your original plot style)
p <- ggplot(plot_df, aes(x = depth, y = dive_name, fill = Phylum)) +
geom_density_ridges(
aes(group = interaction(dive_name, Phylum)),
alpha = 0.6,
scale = 2,
rel_min_height = 0.01,
color = "white",
size = 0.3
) +
scale_fill_manual(values = c("Cnidaria" = "#0072B2", "Porifera" = "#D55E00")) +
#scale_x_reverse(expand = expansion(mult = c(0.01, 0.05))) +
labs(
x = "Depth (m)",
y = NULL,
title = "Depth Distributions of Coral and Sponge Annotations by Dive",
fill = "Phylum"
) +
theme_minimal(base_size = 14) +
theme(
legend.position = "top",
panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 10)
)
## Warning in geom_density_ridges(aes(group = interaction(dive_name, Phylum)), :
## Ignoring unknown parameters: `size`
# Add morphotype richness dots - position dots just right of the max depth (e.g., at x=10 m shallower than min depth)
min_depth <- min(plot_df$depth)
richness_summary <- richness_summary %>%
mutate(xpos = min_depth - 10) # Place dots left of x-axis range (shallower)
p +
geom_point(
data = richness_summary,
aes(x = xpos-250, y = dive_name, color = Phylum, size = morphotype_richness, alpha=0.7),
position = position_nudge(y = ifelse(richness_summary$Phylum == "Cnidaria", 0.20, -0.20)),
inherit.aes = FALSE
) +
scale_color_manual(values = c("Cnidaria" = "#0072B2", "Porifera" = "#D55E00")) +
scale_size_continuous(range = c(0.5, 7)) +
guides(
size = guide_legend(title = "Morphotype Richness"),
color = "none"
)
## Picking joint bandwidth of 15.6
#I need to add dive_name to data.b
data.a <- data.a %>% mutate(time = as.POSIXct(time, format = "%Y-%m-%d %H:%M:%S", tz = "UTC"))
data.b <- data.b %>% mutate(time = as.POSIXct(time, format = "%Y/%m/%d %H:%M:%S", tz = "UTC"))
## Extract metadata for each dive - use dataset B
meta_summary <- data.b %>%
group_by(dive_name) %>%
summarise(across(c(lat, lon, oxygen, depth, temp),
list(mean = ~mean(.x, na.rm=TRUE), sd = ~sd(.x, na.rm=TRUE))))
kable(meta_summary) %>%
kable_styling(font_size = 5, latex_options = c("striped"))
| dive_name | lat_mean | lat_sd | lon_mean | lon_sd | oxygen_mean | oxygen_sd | depth_mean | depth_sd | temp_mean | temp_sd |
|---|---|---|---|---|---|---|---|---|---|---|
| EX2104_Dive03 Hopscotch Seamount | 33.67625 | 0.0001247 | -52.99308 | 0.0008432 | 8.073393 | 0.0259140 | 2497.180 | 55.95509 | 3.266768 | 0.0583540 |
| EX2104_Dive04 Dumbbell Seamount Seamount | 34.39127 | 0.0003023 | -51.77360 | 0.0009397 | 8.161735 | 0.0349583 | 2356.338 | 42.44092 | 3.377768 | 0.0567185 |
| EX2104_Dive05 Rockaway Seamount | 35.81771 | 0.0007215 | -52.30680 | 0.0006939 | 7.677247 | 0.1869162 | 4164.104 | 21.77646 | 2.245520 | 0.0121140 |
| EX2104_Dive06 Castle Rock Seamount | 36.30088 | 0.0000915 | -51.34954 | 0.0010866 | 8.166332 | 0.0362416 | 2176.312 | 59.72247 | 3.740780 | 0.0591794 |
| EX2104_Dive07 Corner Rise 1 Seamount | 35.88859 | 0.0001977 | -51.52052 | 0.0002568 | 8.151222 | 0.0238003 | 2497.275 | 40.74897 | 3.438871 | 0.0449929 |
| EX2104_Dive08 MacGregor Seamount | 35.05412 | 0.0011218 | -48.97419 | 0.0025228 | 7.998123 | 0.1418473 | 1075.383 | 141.17785 | 4.876061 | 0.2106476 |
| EX2104_Dive09 Yakutat Shallow Seamount | 35.17906 | 0.0007692 | -48.11690 | 0.0002508 | 8.270112 | 0.0274373 | 1273.898 | 53.62311 | 4.338314 | 0.0590956 |
| EX2104_Dive10 Yakutat Deep Seamount | 35.26362 | 0.0012008 | -48.00214 | 0.0003997 | 8.194902 | 0.0608428 | 1843.528 | 108.20987 | 3.864164 | 0.0733001 |
| EX2104_Dive11 Caloosahatchee Seamount | 34.65083 | 0.0004038 | -49.65224 | 0.0008947 | 8.014477 | 0.1321868 | 1228.175 | 13.88984 | 4.835937 | 0.1868237 |
# What are max/min values for depth, oxygen etc?
max(data.a$depth)
## [1] 4188.628
min(data.a$depth)
## [1] 940.1595
max(data.a$oxygen)
## [1] 8.3427
min(data.a$oxygen)
## [1] 7.4742
# For Atlantic: Threshold values for OMZ: <2.1 mg/L or hypoxic <0.7 mg/L
omz <- 2.1 # 2.1 is the threshold for mg/L
hypoxic <- 0.7 # 0.7 is the threshold for a severe hypoxic condition in mg/L
# Which depths are inside the OMZ?
within_omz <- data.a %>% filter(oxygen < omz)
min(within_omz$depth)
## Warning in min(within_omz$depth): no non-missing arguments to min; returning
## Inf
## [1] Inf
max(within_omz$depth)
## Warning in max(within_omz$depth): no non-missing arguments to max; returning
## -Inf
## [1] -Inf
#No values within OMZ range
#Also check medium by all dives:
substrate_percent <- data.a %>%
group_by(dive_name, medium) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(dive_name) %>%
mutate(percent = 100 * count / sum(count))
substrate_percent
## # A tibble: 23 × 4
## # Groups: dive_name [9]
## dive_name medium count percent
## <chr> <chr> <int> <dbl>
## 1 EX2104_Dive03 Hopscotch Seamount bedrock 662 98.5
## 2 EX2104_Dive03 Hopscotch Seamount gravel 6 0.893
## 3 EX2104_Dive03 Hopscotch Seamount outcrop 4 0.595
## 4 EX2104_Dive04 Dumbbell Seamount bedrock 547 91.0
## 5 EX2104_Dive04 Dumbbell Seamount outcrop 54 8.99
## 6 EX2104_Dive05 Rockaway Seamount bedrock 41 77.4
## 7 EX2104_Dive05 Rockaway Seamount gravel 8 15.1
## 8 EX2104_Dive05 Rockaway Seamount outcrop 4 7.55
## 9 EX2104_Dive06 Castle Rock Seamount bedrock 248 90.2
## 10 EX2104_Dive06 Castle Rock Seamount outcrop 27 9.82
## # ℹ 13 more rows
Check if any of the environmental variables are correlated, this is important for the analysis, and can be used to exclude variables, simplifying the further analysis.
# Compute Pearson correlation matrix
env.pearson <- cor(data.a[,5:13], use = "pairwise.complete.obs")
round(env.pearson, 2)
## lat pitch lon roll altitude heading oxygen temp depth
## lat 1.00 0.24 0.40 0.23 0.17 0.17 0.09 0.14 -0.10
## pitch 0.24 1.00 -0.04 0.33 0.26 -0.18 0.01 -0.04 0.05
## lon 0.40 -0.04 1.00 -0.22 -0.21 0.51 -0.12 0.79 -0.84
## roll 0.23 0.33 -0.22 1.00 0.35 -0.34 0.08 -0.29 0.29
## altitude 0.17 0.26 -0.21 0.35 1.00 -0.28 0.19 -0.28 0.29
## heading 0.17 -0.18 0.51 -0.34 -0.28 1.00 -0.22 0.65 -0.62
## oxygen 0.09 0.01 -0.12 0.08 0.19 -0.22 1.00 -0.47 0.29
## temp 0.14 -0.04 0.79 -0.29 -0.28 0.65 -0.47 1.00 -0.97
## depth -0.10 0.05 -0.84 0.29 0.29 -0.62 0.29 -0.97 1.00
# Pairplot with correlation coefficients
ggpairs(data.a[,5:13],
lower = list(continuous = wrap("smooth", color = "blue")), # Smoothed regression lines
upper = list(continuous = wrap("cor", size = 5)), # Pearson correlation values
diag = list(continuous = wrap("barDiag", fill = "lightblue"))) + # Histogram on diagonals
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Correlation plot
corrplot(env.pearson, method = "circle", type = "upper", tl.col = "black", tl.cex = 0.8, addCoef.col = "black")
# Create the plot 1) TEMP vs Oxgyen
ggplot(data.a, aes(y = temp, x = oxygen, color = depth)) +
geom_point(size = 1) + # Scatter plot with color representing depth
#scale_color_gradient(name = "depth (m)", low = "blue", high = "red") + # Color gradient
scale_color_viridis(option = "viridis", direction = -1) + # Color scale with cividis for colorblind-friendly option
#scale_y_reverse() + # Reverse y-axis to put shallow depth on top
xlab("Oxygen (mg/L)") + # X-axis label
ylab("Temperature (°C)") + # Left y-axis label
theme_linedraw() +
theme(panel.grid = element_blank(),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10),
axis.title.x = element_text (size = 14),
axis.title.y = element_text(size = 14),
legend.background = element_blank(),
legend.box.background= element_rect(colour="black"),
legend.position = "right")
# Create the plot 2) Salinity vs Temperature
ggplot(data.a, aes(y = temp, x = psu, color = depth)) +
geom_point(size = 1) + # Scatter plot with color representing depth
#scale_color_gradient(name = "depth (m)", low = "blue", high = "red") + # Color gradient
scale_color_viridis(option = "viridis", direction = -1) + # Color scale with cividis for colorblind-friendly option
#scale_y_reverse() + # Reverse y-axis to put shallow depth on top
xlab("Salinity (psu)") + # X-axis label
ylab("Temperature (°C)") + # Left y-axis label
theme_linedraw() +
theme(panel.grid = element_blank(),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10),
axis.title.x = element_text (size = 14),
axis.title.y = element_text(size = 14),
legend.background = element_blank(),
legend.box.background= element_rect(colour="black"),
legend.position = "right")
Now extract individual columns, this is needed for the next steps
#Extract individual df for coordinates, species data and env data
#Also add the substrate from data.a
medium <- data.a %>%
group_by(time, medium) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(time) %>%
slice_max(count, n = 1, with_ties = FALSE) %>%
select(time, medium)
data.b <- data.b %>%
left_join(medium, by = "time")
coord <- data.frame(
lat = round(data.b[, 115], 6),
lon = round(data.b[, 116], 6))
ENV <-data.b[,115:120] #all the env data
SPE <-data.b[,3:114] #all annotation/taxa data
#write.xlsx(SPE, "SPE_data.xlsx") #to check the df if needed
In some cases, a dataset is needed where all data is aggregated by site. Here, I will call this Dataset C (short dataset) in contrast to Dataset B (full dataset, rows being annotation timestamps)
# Example definition for get_mode() if you don't have one
get_mode <- function(x) {
ux <- unique(na.omit(x))
ux[which.max(tabulate(match(x, ux)))]
}
data.c <- data.b %>%
group_by(dive_name) %>%
summarise(
# Sum all SPE columns (make sure they exist in data.b)
across(all_of(colnames(SPE)), sum, .names = "{.col}"),
# Mode of medium
medium = get_mode(medium),
# Mean of numeric columns that exist in ENV and data.b
across(all_of(intersect(colnames(ENV), colnames(data.b))) & where(is.numeric), mean, na.rm = TRUE),
# First of coord columns (make sure they exist)
across(all_of(colnames(coord)), first, .names = "{.col}"),
.groups = "drop"
) %>%
as.data.frame()
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(...)`.
## ℹ In group 1: `dive_name = "EX2104_Dive03 Hopscotch Seamount"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
Now I want to create again SPE and ENV dfs from the combined dataset C
ENV2<-data.c[,114:119] #all the env data
SPE2<-data.c[,2:113] #all annotation/taxa data
#write.xlsx(SPE2, "SPE_data.xlsx") #to check the df if needed
Since there might be patterns in the family instead of morphotypes, the data is aggregated into family level. Note that I have not used this dataset for the Corner Rise analysis, but still migth be useful for further exploration.
# Merge taxa information with the column names of SPE, Define a function to replace NA values with the next available information, and replace empty strings with NA
taxa[taxa == ""] <- NA
fill_family <- function(df) {
df %>%
mutate(Family = ifelse(is.na(Family),
ifelse(!is.na(Class), Class, Phylum),
Family))}
taxa_filled <- fill_family(taxa)
column_names <- data.frame(Morphotype = colnames(SPE), stringsAsFactors = FALSE)
taxa_mapping <- taxa_filled %>%
dplyr::select(Morphotype, Family) %>%
dplyr::mutate(Family = as.factor(Family))
# Merge column names with taxa information
column_info <- merge(column_names, taxa_mapping, by = "Morphotype", all.x = TRUE)
# Group columns by family
grouped_columns <- column_info %>%
group_by(Family) %>%
summarise(Morphotype = list(Morphotype), .groups = 'drop')
# Initialize a new data frame for combined data
SPE.fam <- data.frame(matrix(ncol = nrow(grouped_columns), nrow = nrow(SPE)))
colnames(SPE.fam) <- grouped_columns$Family
# Combine columns by family
for (i in 1:nrow(grouped_columns)) {
family_columns <- grouped_columns$Morphotype[[i]]
# Ensure that the selected columns are treated as a data frame
selected_data <- SPE[ , family_columns, drop = FALSE]
# Sum columns belonging to the same family
SPE.fam[[i]] <- rowSums(selected_data, na.rm = TRUE)
}
# print to check
# SPE.fam
To continue with the ordination methods, the environmnetal and species data needs to be transformed. This is the case when count data is used, and many zeros are present e.g. Most commonly used are Hellinger transformation for species data, and standardisation for environmental data. This is needed later on.
# Morphotype data
# Hellinger transformation
# SPE is the "full" data, so all annotations
SPE.hel <- decostand(SPE, method="hellinger")
plotNormalHistogram(SPE.hel) # checking the distribution
#SPE2 is the aggregated data, grouped by sites
SPE2.hel <- decostand(SPE2, method="hellinger")
plotNormalHistogram(SPE2.hel) # checking the distribution
# Hellinger pre-transformation of the species data
# Compute square root of relative abundances per site
SPE.hel3 <-decostand(SPE.fam, method="hellinger") # full data, not grouped by site
# Environmental data
# Standardization
# Convert character to factor, then to numeric
ENV$medium <- as.numeric(as.factor(ENV$medium))
env.z <- decostand(ENV, method="standardize") # After Borcard
ENV2$medium <- as.numeric(as.factor(ENV2$medium))
env2.z <- decostand(ENV2, method= "standardize") # After Borcard
Here the morphotype data is explored in more detail. Which family most speciose and so on.
# Which is the most abundant morphotype?
# Sum the counts for each morphotype (column) across all locations (rows)
total_abundance <- colSums(SPE)
# Sort the families by total abundance in descending order
most_abundant_taxa <- sort(total_abundance, decreasing = TRUE)
# View the sorted list of most abundant taxa
most_abundant_taxa
## CAO39 P07 PH24 CAO28 PH13 PD08 PD17 PH10 PD03 CAO29 CAO46 CAO02 CAH04
## 900 362 322 286 266 262 218 196 146 127 114 111 82
## CAO21 PH07 PH55 CAH18 CAO05 CAH30 CA01 PH36 PH41 CAO30 PH40 PD22 CAH08
## 71 62 59 59 58 48 47 41 37 34 33 33 31
## CAH15 PH32 PH75 PH51 CAO11 PH43 PH69 PH52 PH71 PH58 PH15 CAO42 CAH20
## 30 29 27 27 26 24 24 24 22 22 20 19 17
## PH39 CAO23 PH59 PH74 PH77 CAH09 P08 PH27 CAO19 CAH22 CAO43 PD12 CAO41
## 16 15 14 14 14 11 11 9 9 9 9 8 8
## PH61 PH47 CAO33 CAO60 CAO56 CAO07 CAO17 CAO45 CAO25 PD14 CAH37 PH53 CAO10
## 8 7 7 7 7 6 6 6 6 6 6 5 5
## PD10 PH48 PD11 CAO34 CAO53 CAH21 CAH35 PH21 P14 CAH33 CAO55 PH66 CAO01
## 5 5 5 5 5 5 5 4 4 4 4 4 3
## CAO04 PH45 CAO31 PH33 PH82 PH34 CAO47 CAH23 PH70 CAO48 CAO35 CAO37 CAO38
## 3 3 3 2 2 2 2 2 2 2 2 2 2
## PH63 CAH36 P13 CAO13 CAO14 CAO16 PH35 CAO20 PH49 CAO22 PH78 CAO27 P10
## 2 2 1 1 1 1 1 1 1 1 1 1 1
## CAO49 CAO36 CAH27 PH73 CAH34 CAO54 CAO61 PH64
## 1 1 1 1 1 1 1 1
# Which is the most abundant family?
# Sum the counts for each morphotype (column) across all locations (rows)
total_abundance_fam <- colSums(SPE.fam)
most_abundant_family<- sort(total_abundance_fam, decreasing = TRUE)
most_abundant_family
## Primnoidae Porifera Keratoisididae Demospongiae
## 958 813 651 500
## Farreidae Euplectellidae Chrysogorgiidae Schizopathidae
## 394 330 202 180
## Rossellidae Anthozoa Euretidae Hexactinellida
## 145 84 76 71
## Antipathidae Myxillidae Coralliidae Madreporidae
## 60 33 30 30
## Tretodictyidae Dendrophylliidae Pheronematidae Leiopathidae
## 20 17 14 9
## Aphrocallistidae Polymastiidae Plexuridae Plexauridae
## 8 8 7 6
## Umbellulidae Cladopathidae Hyalonematidae Dendoricellidae
## 3 2 2 1
## Paramuriceidae
## 1
# Which taxa/families are depth generalists? Which are more specialised? (Occuring in only one depth e.g.)
# Combine species data with depth information
species_depth <- cbind(SPE.fam, depth = ENV$depth)
# count how many unique depths each morphotype appears in
species_depth_counts <- species_depth %>%
summarise(across(-depth, ~length(unique(depth[. > 0])))) %>%
t() %>%
as.data.frame()
colnames(species_depth_counts) <- "Unique_depths"
species_depth_counts$Species <- rownames(species_depth_counts)
# View family with the most and least depth occurrences
species_depth_counts <- species_depth_counts[order(species_depth_counts$Unique_depths), ]
# Define threshold (e.g., species in < 10 depths = specialist, otherwise generalist)
species_depth_counts$Category <- ifelse(species_depth_counts$Unique_depths < 10, "Specialist", "Generalist")
# Convert SPE to long format for ggplot
species_long <- SPE.fam %>% #HERE CHANGE TO SPE if you want to check for morphotypes instead
mutate(depth = ENV$depth) %>%
pivot_longer(cols = -depth, names_to = "Species", values_to = "Abundance") %>%
filter(Abundance > 0) # Remove absences
# Calculate depth range for each species
species_ranges <- species_long %>%
group_by(Species) %>%
summarise(Mindepth = min(depth), Maxdepth = max(depth)) %>%
mutate(depthRange = Maxdepth - Mindepth,
Category = ifelse(depthRange > 1000, "Generalist", "Specialist"))
# Merge category information with long species data
species_long <- species_long %>%
left_join(species_ranges, by = "Species")
# Plot with generalists highlighted
ggplot(species_long, aes(x = reorder(Species, depth), y = depth, color = Category)) +
geom_boxplot() +
coord_flip() +
scale_color_manual(values = c("Generalist" = "red", "Specialist" = "black")) +
labs(y = "depth (m)", x = "Species",
title = "Species depth Ranges",
color = "Category") +
theme_minimal()
# Shannon's H' (for each dive)
H <- tapply(data.a$Morphotype, data.a$dive_name, function(x) {
species_abundance <- table(x)
diversity_index <- diversity(species_abundance, index = "shannon")
return(diversity_index)
})
# Simpsons (for each dive)
S <- tapply(data.a$Morphotype, data.a$dive_name, function(x) {
species_abundance <- table(x)
diversity_index <- diversity(species_abundance, index = "simpson")
return(diversity_index)
})
# Observed Richness
richness <- tapply(data.a$Morphotype, data.a$dive_name, function(x) {
species_abundance <- table(x)
diversity_index <- specnumber(species_abundance)
return(diversity_index)
})
# Pielou's Evenness
#Evenness is a measure of the relative abundance of the different species in the same area.
#Low J indicates that 1 or few species dominate the community
evenness <- H/log(richness)
# Create alpha diversity dataframe, include info about environmnent and locations
div.df <- cbind(shannon = H, richness = richness, pielou = evenness, simps = S, ENV2)
div.df$site <- data.c[,1]
kable(div.df) %>% kable_styling(font_size = 8, latex_options = c("striped"))
| shannon | richness | pielou | simps | medium | lat | lon | oxygen | depth | temp | site | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| EX2104_Dive03 Hopscotch Seamount | 2.276208 | 35 | 0.6402208 | 0.8397995 | 1 | 33.67625 | -52.99308 | 8.073393 | 2497.180 | 3.266768 | EX2104_Dive03 Hopscotch Seamount |
| EX2104_Dive04 Dumbbell Seamount | 2.586724 | 35 | 0.7275585 | 0.8746321 | 1 | 34.39127 | -51.77360 | 8.161735 | 2356.338 | 3.377768 | EX2104_Dive04 Dumbbell Seamount Seamount |
| EX2104_Dive05 Rockaway Seamount | 1.584695 | 9 | 0.7212260 | 0.7148451 | 1 | 35.81771 | -52.30680 | 7.677247 | 4164.104 | 2.245520 | EX2104_Dive05 Rockaway Seamount |
| EX2104_Dive06 Castle Rock Seamount | 2.961133 | 38 | 0.8140379 | 0.9130050 | 1 | 36.30088 | -51.34954 | 8.166332 | 2176.312 | 3.740780 | EX2104_Dive06 Castle Rock Seamount |
| EX2104_Dive07 Corner Rise 1 Seamount | 2.453429 | 31 | 0.7144550 | 0.8526306 | 1 | 35.88859 | -51.52052 | 8.151222 | 2497.275 | 3.438871 | EX2104_Dive07 Corner Rise 1 Seamount |
| EX2104_Dive08 MacGregor Seamount | 2.184710 | 31 | 0.6362020 | 0.7895265 | 2 | 35.05412 | -48.97419 | 7.998123 | 1075.383 | 4.876061 | EX2104_Dive08 MacGregor Seamount |
| EX2104_Dive09 Yakutat Shallow Seamount | 2.515632 | 28 | 0.7549454 | 0.8860117 | 1 | 35.17906 | -48.11690 | 8.270112 | 1273.898 | 4.338314 | EX2104_Dive09 Yakutat Shallow Seamount |
| EX2104_Dive10 Yakutat Deep Seamount | 2.928752 | 29 | 0.8697639 | 0.9291942 | 1 | 35.26362 | -48.00214 | 8.194902 | 1843.528 | 3.864164 | EX2104_Dive10 Yakutat Deep Seamount |
| EX2104_Dive11 Caloosahatchee Seamount | 1.621825 | 28 | 0.4867124 | 0.6261522 | 1 | 34.65083 | -49.65224 | 8.014477 | 1228.175 | 4.835937 | EX2104_Dive11 Caloosahatchee Seamount |
# can also sort Locations by the median of 'shannon'
# div.df$site <- with(div.df, reorder(site, shannon, median))
plot.shan <- ggplot(div.df, aes(x = site, y = shannon, colour = depth, shape = as.factor(medium))) +
geom_point(size = 3) +
scale_colour_viridis_c(option = "cividis", begin = 1, end = 0) +
ylab("Shannon's H") +
xlab("") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4),
plot.margin = unit(c(0, 0, 0, 0), "cm")) # Ensure margins are set
#scale_shape_manual(values = c(15, 16, 17, 18, 19, 20, 21, 22, 23)) # Customize the shapes as needed
plot.shan
plot.rich <- ggplot(div.df, aes(x = site, y = richness, colour = depth, shape = as.factor(medium))) +
geom_point(size = 3) +
scale_colour_viridis_c(option = "cividis", begin = 1, end = 0) +
ylab("Richness") +
xlab("") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4),
plot.margin = unit(c(0, 0, 0, 0), "cm")) # Ensure margins are set
#scale_shape_manual(values = c(15, 16, 17, 18, 19, 20, 21, 22, 23)) # Customize the shapes as needed
plot.rich
plot.even <- ggplot(div.df, aes(x = site, y = pielou, colour = depth, shape = as.factor(medium))) +
geom_point(size = 3) +
scale_colour_viridis_c(option = "cividis", begin = 1, end = 0) +
ylab("Pielous eveness") +
xlab("") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.4),
plot.margin = unit(c(0, 0, 0, 0), "cm")) # Ensure margins are set
#scale_shape_manual(values = c(15, 16, 17, 18, 19, 20, 21, 22, 23)) # Customize the shapes as needed
plot.even
> Now, we explore the alpha diversity further
# What impacts have the various environmental factors on the diversity?
# Define response variables
responses <- c("shannon", "simps", "pielou", "richness")
# Define predictor variables
predictors <- c("depth", "medium", "temp")
# Run linear models and print summaries
lm_results <- lapply(responses, function(resp) {
cat("\n### Response:", resp, "###\n")
lapply(predictors, function(pred) {
cat("\nModel:", resp, "~", pred, "\n")
print(summary(lm(as.formula(paste(resp, "~", pred)), data = div.df)))
})})
##
## ### Response: shannon ###
##
## Model: shannon ~ depth
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84106 -0.29814 0.05872 0.27123 0.62212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6233407 0.4388459 5.978 0.000554 ***
## depth -0.0001306 0.0001904 -0.686 0.514786
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5111 on 7 degrees of freedom
## Multiple R-squared: 0.06299, Adjusted R-squared: -0.07087
## F-statistic: 0.4706 on 1 and 7 DF, p-value: 0.5148
##
##
## Model: shannon ~ medium
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78135 -0.08984 0.08738 0.22067 0.59508
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.5474 0.6417 3.969 0.0054 **
## medium -0.1813 0.5558 -0.326 0.7537
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.524 on 7 degrees of freedom
## Multiple R-squared: 0.01498, Adjusted R-squared: -0.1257
## F-statistic: 0.1065 on 1 and 7 DF, p-value: 0.7537
##
##
## Model: shannon ~ temp
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7717 -0.2106 0.1227 0.2587 0.6168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.17616 0.86314 2.521 0.0397 *
## temp 0.04495 0.22381 0.201 0.8465
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5264 on 7 degrees of freedom
## Multiple R-squared: 0.00573, Adjusted R-squared: -0.1363
## F-statistic: 0.04034 on 1 and 7 DF, p-value: 0.8465
##
##
## ### Response: simps ###
##
## Model: simps ~ depth
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20621 -0.04408 0.03058 0.05402 0.10183
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.423e-01 9.078e-02 9.279 3.5e-05 ***
## depth -8.126e-06 3.939e-05 -0.206 0.842
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1057 on 7 degrees of freedom
## Multiple R-squared: 0.006042, Adjusted R-squared: -0.136
## F-statistic: 0.04255 on 1 and 7 DF, p-value: 0.8424
##
##
## Model: simps ~ medium
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.20338 0.00000 0.02310 0.05648 0.09966
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.86954 0.12869 6.757 0.000263 ***
## medium -0.04001 0.11144 -0.359 0.730188
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1051 on 7 degrees of freedom
## Multiple R-squared: 0.01808, Adjusted R-squared: -0.1222
## F-statistic: 0.1289 on 1 and 7 DF, p-value: 0.7302
##
##
## Model: simps ~ temp
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.18410 -0.02017 0.02282 0.06879 0.10534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.87793 0.17265 5.085 0.00142 **
## temp -0.01399 0.04477 -0.313 0.76371
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1053 on 7 degrees of freedom
## Multiple R-squared: 0.01377, Adjusted R-squared: -0.1271
## F-statistic: 0.0977 on 1 and 7 DF, p-value: 0.7637
##
##
## ### Response: pielou ###
##
## Model: pielou ~ depth
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.195351 -0.043374 -0.003286 0.071596 0.170401
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.475e-01 9.910e-02 6.534 0.000324 ***
## depth 2.811e-05 4.301e-05 0.654 0.534212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1154 on 7 degrees of freedom
## Multiple R-squared: 0.05753, Adjusted R-squared: -0.07711
## F-statistic: 0.4273 on 1 and 7 DF, p-value: 0.5342
##
##
## Model: pielou ~ medium
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.229403 -0.001660 0.005111 0.038830 0.153649
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.79603 0.14136 5.631 0.00079 ***
## medium -0.07991 0.12242 -0.653 0.53474
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1154 on 7 degrees of freedom
## Multiple R-squared: 0.05738, Adjusted R-squared: -0.07728
## F-statistic: 0.4261 on 1 and 7 DF, p-value: 0.5347
##
##
## Model: pielou ~ temp
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.168358 -0.061335 -0.009374 0.075383 0.166866
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.89308 0.18123 4.928 0.0017 **
## temp -0.04922 0.04699 -1.047 0.3298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1105 on 7 degrees of freedom
## Multiple R-squared: 0.1355, Adjusted R-squared: 0.01196
## F-statistic: 1.097 on 1 and 7 DF, p-value: 0.3298
##
##
## ### Response: richness ###
##
## Model: richness ~ depth
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.046 -5.617 -1.745 6.840 8.932
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40.039037 6.318429 6.337 0.00039 ***
## depth -0.005041 0.002742 -1.839 0.10858
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.358 on 7 degrees of freedom
## Multiple R-squared: 0.3256, Adjusted R-squared: 0.2293
## F-statistic: 3.38 on 1 and 7 DF, p-value: 0.1086
##
##
## Model: richness ~ medium
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.125 -1.125 0.000 5.875 8.875
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 27.250 10.943 2.490 0.0416 *
## medium 1.875 9.477 0.198 0.8488
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.935 on 7 degrees of freedom
## Multiple R-squared: 0.00556, Adjusted R-squared: -0.1365
## F-statistic: 0.03914 on 1 and 7 DF, p-value: 0.8488
##
##
## Model: richness ~ temp
##
## Call:
## lm(formula = as.formula(paste(resp, "~", pred)), data = div.df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.4865 -3.8488 -0.7277 7.4483 8.8243
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.441 13.164 0.945 0.376
## temp 4.474 3.413 1.311 0.231
##
## Residual standard error: 8.029 on 7 degrees of freedom
## Multiple R-squared: 0.197, Adjusted R-squared: 0.08231
## F-statistic: 1.718 on 1 and 7 DF, p-value: 0.2314
#Plot with smoothing regression to show trend...
depth.reg <- ggplot(div.df, aes(x = depth, y = shannon, colour = site)) +
geom_smooth(method = "lm", colour = "black", fill = "grey90") +
geom_point(size = 3) +
scale_colour_viridis_d(option = "magma", begin = 0.2, end = 0.8) +
xlab(bquote(depth (m))) +
ylab("Shannon's H'") +
ylim(0,3.5) +
theme_bw()
depth.reg
## `geom_smooth()` using formula = 'y ~ x'
sediments.reg <- ggplot(div.df, aes(x = medium, y = shannon, colour = site)) +
geom_point(size = 3) +
scale_colour_viridis_d(option = "magma", begin = 0.2, end = 0.8) +
xlab("Primary sediment consistency (m)") +
ylab("Shannon's H'") +
ylim(0,3.5) +
theme_bw()
sediments.reg
# Run linear models and print summaries
anova_results <- lapply(responses, function(resp) {
cat("\n### Response:", resp, "###\n")
lapply(predictors, function(pred) {
cat("\nModel:", resp, "~", pred, "\n")
print(summary(aov(as.formula(paste(resp, "~", pred)), data = div.df)))
})})
##
## ### Response: shannon ###
##
## Model: shannon ~ depth
## Df Sum Sq Mean Sq F value Pr(>F)
## depth 1 0.1229 0.1229 0.471 0.515
## Residuals 7 1.8282 0.2612
##
## Model: shannon ~ medium
## Df Sum Sq Mean Sq F value Pr(>F)
## medium 1 0.0292 0.02923 0.106 0.754
## Residuals 7 1.9219 0.27456
##
## Model: shannon ~ temp
## Df Sum Sq Mean Sq F value Pr(>F)
## temp 1 0.0112 0.01118 0.04 0.847
## Residuals 7 1.9400 0.27714
##
## ### Response: simps ###
##
## Model: simps ~ depth
## Df Sum Sq Mean Sq F value Pr(>F)
## depth 1 0.00048 0.000475 0.043 0.842
## Residuals 7 0.07823 0.011175
##
## Model: simps ~ medium
## Df Sum Sq Mean Sq F value Pr(>F)
## medium 1 0.00142 0.001423 0.129 0.73
## Residuals 7 0.07728 0.011040
##
## Model: simps ~ temp
## Df Sum Sq Mean Sq F value Pr(>F)
## temp 1 0.00108 0.001083 0.098 0.764
## Residuals 7 0.07762 0.011088
##
## ### Response: pielou ###
##
## Model: pielou ~ depth
## Df Sum Sq Mean Sq F value Pr(>F)
## depth 1 0.00569 0.005691 0.427 0.534
## Residuals 7 0.09324 0.013319
##
## Model: pielou ~ medium
## Df Sum Sq Mean Sq F value Pr(>F)
## medium 1 0.00568 0.005677 0.426 0.535
## Residuals 7 0.09325 0.013321
##
## Model: pielou ~ temp
## Df Sum Sq Mean Sq F value Pr(>F)
## temp 1 0.01340 0.01340 1.097 0.33
## Residuals 7 0.08552 0.01222
##
## ### Response: richness ###
##
## Model: richness ~ depth
## Df Sum Sq Mean Sq F value Pr(>F)
## depth 1 183 183.01 3.38 0.109
## Residuals 7 379 54.14
##
## Model: richness ~ medium
## Df Sum Sq Mean Sq F value Pr(>F)
## medium 1 3.1 3.12 0.039 0.849
## Residuals 7 558.9 79.84
##
## Model: richness ~ temp
## Df Sum Sq Mean Sq F value Pr(>F)
## temp 1 110.7 110.73 1.718 0.231
## Residuals 7 451.3 64.47
#Explore the medium/substrate - OBS: Only one has predominantly gravel, all others are bedrock ...
sed.plot <- ggplot(div.df, aes(x = medium, y = shannon, fill = as.factor(medium))) +
geom_boxplot(aes(fill = as.factor(medium))) +
geom_point(size = 3, aes(colour = site)) +
scale_colour_viridis_d(option = "magma", begin = 0.2, end = 0.8) +
scale_fill_manual(values = c("grey60", "grey90", "black"), guide = FALSE) +
ylab("Shannon's H'") +
xlab('')+
theme_bw()
sed.plot
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# To calculate Beta diversity (across sites) pairwise dissimilarities/distances need to be calculated
# Prepare data: Convert morphotype data to presence-absence matrix
SPE.pa <- table(data.a$dive_name, data.a$Morphotype) #this is an abundance matrix with actual numbers.This is better than using just 0/1 presence/absence because bray curtis is sensitive to abundances. It also adds some more value to the data.
# Calculate beta diversity using Bray-Curtis dissimilarity
bray.df <- vegdist(SPE.pa, method = "bray") %>%
as.matrix() %>%
as.data.frame()
# The matrix can be further used to explore how distant sites are.
# 1) Are sites more similar that are closer to each other? GEOGRAPHIC DISTANCE
divesites <- unique(data.a$dive_name)
# Create a list to store the first coordinate values and Dive names for each Dive site
first_coordinates_list <- lapply(divesites, function(dive_site) {
# Extract the first coordinate values for the Dive site
first_coordinates <- data.a %>%
filter(dive_name == dive_site) %>%
slice(1) %>%
dplyr::select(dive_name, lat, lon)
return(first_coordinates)
})
# Extract latitude and longitude values from the list of data frames
coordinates <- lapply(first_coordinates_list, function(df) df[, c("lon", "lat")])
# Convert the list of coordinates to a matrix
coordinates_matrix <- do.call(rbind, coordinates)
# Calculate distances between points using the distm function with Haversine formula (for km)
distance_matrix <- distm(coordinates_matrix)
rownames(distance_matrix) <- data.c$dive_name
colnames(distance_matrix) <- data.c$dive_name
# Print the distance matrix - if wanted
# print(distance_matrix)
heatmap(distance_matrix)
# Convert depth_distance_matrix to long format
geo_long <- as.data.frame(as.table(distance_matrix))
colnames(geo_long) <- c("Site1", "Site2", "Geo")
# Convert bray_curtis_matrix to long format
bray_long <- as.data.frame(as.table(as.matrix(bray.df)))
colnames(bray_long) <- c("Site1", "Site2", "Bray_Curtis")
# Merge the dataframes on Site1 and Site2
combined_df <- merge(geo_long, bray_long, by = c("Site1", "Site2"))
# Remove rows where Site1 == Site2 (comparisons with self)
combined_df <- combined_df[combined_df$Site1 != combined_df$Site2, ]
# 2) Are sites more similar that share similar depths? depth DISTANCE
# Create an empty matrix to store the depth distances
depth_distance_matrix <- matrix(NA, nrow = nrow(data.c), ncol = nrow(data.c)) # using data.c as this has the average depths for each site ready
# Fill the matrix with depth differences
for (i in 1:nrow(data.c)) {
for (j in 1:nrow(data.c)) {
# Calculate the absolute difference in depths between locations i and j
depth_distance_matrix[i, j] <- abs(data.c$depth[i] - data.c$depth[j])
}}
# Convert matrices to DISTANCE objects (crucial!)
bray_dist <- as.dist(as.matrix(bray.df)) # Bray-Curtis
geo_dist <- as.dist(distance_matrix) # Geographic
depth_dist <- as.dist(depth_distance_matrix) # Depth
# Run Mantel tests (9999 permutations for robust p-values)
mantel_geo <- mantel(bray_dist, geo_dist, method = "pearson", permutations = 9999)
mantel_depth <- mantel(bray_dist, depth_dist, method = "pearson", permutations = 9999)
# Print results
print(mantel_geo)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = bray_dist, ydis = geo_dist, method = "pearson", permutations = 9999)
##
## Mantel statistic r: 0.4195
## Significance: 0.015
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.226 0.303 0.372 0.454
## Permutation: free
## Number of permutations: 9999
print(mantel_depth)
##
## Mantel statistic based on Pearson's product-moment correlation
##
## Call:
## mantel(xdis = bray_dist, ydis = depth_dist, method = "pearson", permutations = 9999)
##
## Mantel statistic r: 0.7486
## Significance: 1e-04
##
## Upper quantiles of permutations (null model):
## 90% 95% 97.5% 99%
## 0.300 0.364 0.414 0.491
## Permutation: free
## Number of permutations: 9999
# Replace nested loops for depth matrix with:
depth_distance_matrix <- as.matrix(dist(data.c$depth, diag = TRUE, upper = TRUE))
# More efficient haversine matrix (replace distm approach):
library(geodist)
geo_matrix <- geodist(coordinates_matrix, measure = "haversine")
# Use this for ALL Mantel plot visualizations (no statistical labels)
plot_mantel <- function(dist_matrix, xlab) {
df <- data.frame(
Distance = as.vector(as.dist(dist_matrix)),
Bray = as.vector(as.dist(as.matrix(bray.df)))
)
ggplot(df, aes(x = Distance, y = Bray)) +
geom_point(alpha = 0.6) +
geom_smooth(method = "lm", color = "red") +
labs(x = xlab, y = "Bray-Curtis Dissimilarity") +
theme_minimal()
}
plot_mantel(geo_matrix, "Geographic Distance (m)")
## `geom_smooth()` using formula = 'y ~ x'
plot_mantel(depth_distance_matrix, "Depth Difference (m)")
## `geom_smooth()` using formula = 'y ~ x'
Use comm matrix to infer nestedness and turnover between sites
# Replace 'morphotype' with your actual column name for coral/sponge ID
community_matrix <- data.a %>%
group_by(dive_name, Morphotype) %>% # Use dive_id or dive_name as grouping var
summarise(count = n(), .groups = "drop") %>%
pivot_wider(names_from = Morphotype, values_from = count, values_fill = 0) %>%
column_to_rownames("dive_name")
# Convert to presence/absence for Jaccard-based analysis
comm_pa <- (community_matrix > 0) * 1
# Run beta diversity partitioning
beta_core <- betapart.core(comm_pa)
beta_results <- beta.multi(beta_core, index.family = "jaccard")
print(beta_results)
## $beta.JTU
## [1] 0.8825348
##
## $beta.JNE
## [1] 0.02246522
##
## $beta.JAC
## [1] 0.905
beta_pair <- beta.pair(beta_core, index.family = "jaccard")
# For example, visualize turnover:
heatmap(as.matrix(beta_pair$beta.jtu), main = "Turnover between dives")
# Assume comm_pa is your presence-absence community matrix (sites x species)
beta_pair <- beta.pair(comm_pa, index.family = "sor")
turnover_df <- as.data.frame(as.matrix(beta_pair$beta.sim)) %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "value")
nestedness_df <- as.data.frame(as.matrix(beta_pair$beta.sne)) %>%
rownames_to_column("Var1") %>%
pivot_longer(-Var1, names_to = "Var2", values_to = "value")
# Combine into one data frame with a label
turnover_df$component <- "Turnover"
nestedness_df$component <- "Nestedness"
beta_long <- rbind(turnover_df, nestedness_df)
colnames(beta_long) <- c("Site1", "Site2", "Dissimilarity", "Component")
# Remove self comparisons (Site1 == Site2)
beta_long <- beta_long[beta_long$Site1 != beta_long$Site2, ]
ggplot(beta_long, aes(x=Component, y=Dissimilarity)) +
geom_boxplot(fill = c("skyblue", "orange")) +
labs(title="Pairwise Beta Diversity Components", y="Dissimilarity", x="Component") +
theme_minimal()
#Make a vector that has the clusters we identified before:
group_vector <- c("cluster1", "cluster1", "deep", "cluster1", "cluster1",
"cluster2", "cluster2", "cluster2", "cluster2")
# Create a data frame with group info for each site
group_df <- data.frame(site = rownames(comm_pa), group = group_vector)
# Function to get turnover values between all pairs and their group combinations
get_pairwise_turnover <- function(beta_matrix, group_df) {
pairs <- combn(rownames(beta_matrix), 2)
results <- data.frame(
site1 = pairs[1,],
site2 = pairs[2,],
turnover = mapply(function(x,y) beta_matrix[x,y], pairs[1,], pairs[2,]))
# Add group info for each site
results <- merge(results, group_df, by.x = "site1", by.y = "site")
colnames(results)[4] <- "group1"
results <- merge(results, group_df, by.x = "site2", by.y = "site")
colnames(results)[5] <- "group2"
# Label pairs as within-group or between-group
results$pair_type <- ifelse(results$group1 == results$group2, "Within group", "Between groups")
return(results)
}
turnover_pairs <- get_pairwise_turnover(as.matrix(beta_pair$beta.sim), group_df)
# Boxplot turnover by pair type
ggplot(turnover_pairs, aes(x=pair_type, y=turnover, fill=pair_type)) +
geom_boxplot() +
labs(title="Species Turnover Within and Between Groups", y="Turnover (β_SIM)", x="Pair Type") +
scale_fill_manual(values = c("Within group"="lightgreen", "Between groups"="pink")) +
theme_minimal()
## STATS
set.seed(123) # for reproducibility
# Extract turnover values by pair type
within_turnover <- turnover_pairs$turnover[turnover_pairs$pair_type == "Within group"]
between_turnover <- turnover_pairs$turnover[turnover_pairs$pair_type == "Between groups"]
# Observed difference in means
obs_diff <- mean(between_turnover) - mean(within_turnover)
# Combine all turnover values and labels
all_values <- turnover_pairs$turnover
labels <- turnover_pairs$pair_type
# Permutation test
n_perm <- 999
perm_diffs <- numeric(n_perm)
for (i in 1:n_perm) {
perm_labels <- sample(labels) # shuffle labels
perm_within <- all_values[perm_labels == "Within group"]
perm_between <- all_values[perm_labels == "Between groups"]
perm_diffs[i] <- mean(perm_between) - mean(perm_within)
}
# Calculate p-value (two-tailed)
p_value <- (sum(abs(perm_diffs) >= abs(obs_diff)) + 1) / (n_perm + 1)
p_value
## [1] 0.001
ggplot(turnover_pairs, aes(x = pair_type, y = turnover, fill = pair_type)) +
geom_boxplot() +
labs(title = "Species Turnover Within and Between Groups",
y = "Turnover (β_SIM)",
x = "Pair Type") +
scale_fill_manual(values = c("Within group" = "lightgreen", "Between groups" = "pink")) +
theme_minimal() +
annotate("text", x = 1.5, y = max(turnover_pairs$turnover) * 0.95,
label = paste0("p = ", signif(p_value, 3)), size = 5)
#Multipatt = Identify species that are characteristic of particular groups (here, east vs west). So e.g. "These taxa are diagnostic of Cluster X"
group_vector <- cluster_assignments$cluster[match(rownames(community_matrix), cluster_assignments$dive_name)]
indval <- multipatt(community_matrix, group_vector, func = "IndVal.g", control = how(nperm = 999))
summary(indval, indvalcomp = TRUE, alpha = 0.05)
##
## Multilevel pattern analysis
## ---------------------------
##
## Association function: IndVal.g
## Significance level (alpha): 0.05
##
## Total number of species: 112
## Selected number of species: 6
## Number of species associated to 1 group: 6
## Number of species associated to 2 groups: 0
##
## List of species associated to each combination:
##
## Group cluster1 #sps. 3
## A B stat p.value
## CAO11 1 1 1 0.015 *
## CAO21 1 1 1 0.015 *
## PD03 1 1 1 0.015 *
##
## Group cluster2 #sps. 3
## A B stat p.value
## CAO29 1.0000 1.0000 1.000 0.021 *
## PD17 1.0000 1.0000 1.000 0.021 *
## CAO28 0.9965 1.0000 0.998 0.036 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Meaning:
#Species CAO11, CAO21, and PD03 are perfect indicators of cluster1. They only occur in that group (A = 1) and are always found there (B = 1).
# stat = 1 is the maximum possible IndVal. p.value < 0.05 means these are statistically significant indicator species.
# Same for cluster2 (east) with different species.
# Cluster 3 (deep site 5) represents a distinct community—it’s compositionally different enough that it doesn't share indicator species with the other clusters.
# It has low species turnover (i.e., species are widespread). It’s functionally or taxonomically unique but not strongly dominated by one taxon.
# Or it’s ecotonal (a transition zone between east/west clusters).
#The deep site doesnt work, because its only one location. But I can check separately, if there is any morphotype singularly occuring in the deep site.
comm_pa <- community_matrix
comm_pa[comm_pa > 0] <- 1 # Convert to binary presence/absence
deep_site <- "EX2104_Dive05 Rockaway Seamount"
cluster1_sites <- rownames(comm_pa)[group_vector == "cluster1"]
cluster2_sites <- rownames(comm_pa)[group_vector == "cluster2"]
# Morphotypes present in the deep site
species_deep <- names(which(unlist(comm_pa[deep_site, ]) == 1))
species_deep
## [1] "CAH04" "PH10" "PH13" "PH82" "CAH23" "CAO45" "PD22" "PH69" "PH70"
species_cluster1 <- names(which(colSums(comm_pa[cluster1_sites, ]) > 0))
species_cluster1
## [1] "CAH04" "CAH30" "CAO01" "CAO02" "CAO04" "CAO07" "CAO10" "CAO11" "CAO13"
## [10] "CAO14" "CAO16" "CAO17" "CAO21" "CAO46" "P13" "P14" "PD03" "PD08"
## [19] "PD10" "PH07" "PH10" "PH13" "PH15" "PH21" "PH24" "PH27" "PH32"
## [28] "PH33" "PH34" "PH39" "PH48" "PH51" "PH53" "PH75" "PH82" "CAO05"
## [37] "CAO19" "CAO20" "CAO22" "CAO47" "PD11" "PH35" "PH36" "PH40" "PH41"
## [46] "PH43" "PH45" "PH47" "PH49" "PH71" "CAH08" "CAH09" "CAO23" "CAO25"
## [55] "CAO27" "CAO28" "CAO48" "PD12" "PD14" "PH52" "PH55" "PH58" "PH78"
## [64] "CAO49" "P10" "PH59"
species_cluster2 <- names(which(colSums(comm_pa[cluster2_sites, ]) > 0))
species_cluster2
## [1] "CAH04" "CAH30" "CAO02" "CAO10" "PD08" "PD10" "PH07" "PH10" "PH13"
## [10] "PH15" "PH24" "PH32" "PH48" "PH75" "PH40" "PH43" "PH71" "PD22"
## [19] "CAH08" "CAH09" "CAO28" "PD14" "PH52" "PH55" "PH58" "PH59" "CA01"
## [28] "CAH15" "CAH18" "CAH22" "CAH37" "CAO29" "CAO30" "CAO31" "CAO33" "CAO34"
## [37] "CAO35" "CAO36" "CAO37" "CAO38" "CAO39" "CAO41" "P07" "P08" "PD17"
## [46] "PH61" "PH63" "CAH20" "CAH27" "CAH33" "CAH34" "CAO42" "CAO43" "CAO53"
## [55] "PH73" "PH74" "CAH21" "CAO54" "CAO55" "CAO56" "CAO60" "CAO61" "PH77"
## [64] "CAH35" "CAH36" "PH64" "PH66"
other_species <- names(which(colSums(comm_pa[c(cluster1_sites, cluster2_sites), ]) > 0))
# Unique to deep site
unique_to_deep <- setdiff(species_deep, other_species)
unique_to_deep
## [1] "CAH23" "CAO45" "PH69" "PH70"
#So now we can say that "CAH23" "CAO45" "PH69" "PH70" are only present in the deep site. Which we can maybe take as indicators?
beta_results <- beta.multi(comm_pa, index.family = "sor")
beta_results
## $beta.SIM
## [1] 0.7897649
##
## $beta.SNE
## [1] 0.03671915
##
## $beta.SOR
## [1] 0.826484
#Total beta diversity is high (β.SOR ≈ 0.83): There’s a lot of difference in species composition between your dives — which aligns with what you saw in the dbRDA.
#**Most of the variation is due to species turnover (β.SIM ≈ 0.79): Species are being replaced from one dive to another, rather than simply being lost or gained.
# Very little nestedness (β.SNE ≈ 0.037):This means communities are not just subsets of each other (e.g., Dive 5 isn’t just a "poorer" version of the east or west group). Instead, the communities have different species altogether.
To complement this, we can do a SIMPER analysis (done by Ellen also in Primer, but jsut adding this here into the R script now). SIMPER identifies taxa that DISTINGUISH the groups. So its complementary to multipatt above.
#Simper = Identify species that contribute most to dissimilarity between groups.
# Run SIMPER
simper_results <- simper(community_matrix, group_vector, permutations = 999)
summary(simper_results, indvalcomp = TRUE, alpha = 0.05)
##
## Contrast: cluster1_deep
##
## average sd ratio ava avb cumsum p
## PD08 0.13121 0.11053 1.18700 64.50000 0.00000 0.135 0.049 *
## PH24 0.11719 0.13234 0.88600 80.25000 0.00000 0.256 0.181
## PD03 0.08279 0.08020 1.03200 36.50000 0.00000 0.341 0.066 .
## PH10 0.07147 0.09527 0.75000 44.50000 13.00000 0.415 0.142
## PH69 0.05196 0.02004 2.59300 0.00000 24.00000 0.468 0.061 .
## CAO46 0.04530 0.06145 0.73700 28.50000 0.00000 0.515 0.114
## CAO21 0.04225 0.06017 0.70200 17.75000 0.00000 0.558 0.114
## CAO02 0.03588 0.04809 0.74600 24.00000 0.00000 0.595 0.118
## CAO05 0.03068 0.03805 0.80600 14.50000 0.00000 0.627 0.110
## CAH04 0.03007 0.02469 1.21800 12.75000 1.00000 0.658 0.021 *
## PH07 0.02507 0.01172 2.13900 12.75000 0.00000 0.684 0.012 *
## CAH30 0.02480 0.02654 0.93500 9.00000 0.00000 0.709 0.047 *
## PH13 0.02018 0.00851 2.37200 11.75000 1.00000 0.730 0.765
## PH51 0.01902 0.03412 0.55700 6.75000 0.00000 0.750 0.117
## PH36 0.01869 0.02198 0.85000 10.25000 0.00000 0.769 0.114
## PH41 0.01490 0.02590 0.57500 9.25000 0.00000 0.784 0.112
## PH52 0.01337 0.02019 0.66200 4.50000 0.00000 0.798 0.145
## PH40 0.01328 0.01743 0.76200 7.50000 0.00000 0.811 0.113
## CAO11 0.01313 0.00385 3.40900 6.50000 0.00000 0.825 0.001 ***
## CAO45 0.01299 0.00501 2.59300 0.00000 6.00000 0.838 0.061 .
## PH75 0.01247 0.02226 0.56000 4.50000 0.00000 0.851 0.217
## PH43 0.01240 0.01368 0.90700 5.75000 0.00000 0.864 0.089 .
## CAO23 0.01135 0.02093 0.54200 3.75000 0.00000 0.876 0.117
## PH32 0.01054 0.00960 1.09800 5.75000 0.00000 0.887 0.121
## PH71 0.01022 0.01847 0.55400 4.00000 0.00000 0.897 0.163
## PH15 0.00684 0.01278 0.53500 4.50000 0.00000 0.904 0.184
## PD22 0.00649 0.00251 2.59300 0.00000 3.00000 0.911 0.521
## PD12 0.00610 0.01220 0.50000 2.00000 0.00000 0.917 0.117
## PH39 0.00578 0.00671 0.86100 4.00000 0.00000 0.923 0.188
## CAO19 0.00549 0.00907 0.60600 2.25000 0.00000 0.929 0.115
## CAO25 0.00457 0.00915 0.50000 1.50000 0.00000 0.933 0.117
## CAH23 0.00433 0.00167 2.59300 0.00000 2.00000 0.938 0.061 .
## PH70 0.00433 0.00167 2.59300 0.00000 2.00000 0.942 0.061 .
## PH47 0.00382 0.00458 0.83400 1.75000 0.00000 0.946 0.131
## PD14 0.00372 0.00579 0.64300 1.25000 0.00000 0.950 0.121
## PH27 0.00351 0.00346 1.01500 2.25000 0.00000 0.954 0.109
## PH58 0.00305 0.00610 0.50000 1.00000 0.00000 0.957 0.540
## PD11 0.00267 0.00315 0.84900 1.25000 0.00000 0.960 0.141
## CAO07 0.00256 0.00178 1.43700 1.50000 0.00000 0.962 0.062 .
## CAH08 0.00229 0.00457 0.50000 0.75000 0.00000 0.965 0.857
## CAO17 0.00226 0.00365 0.61800 1.50000 0.00000 0.967 0.112
## P14 0.00221 0.00290 0.76400 1.00000 0.00000 0.969 0.101
## CAH09 0.00220 0.00289 0.76000 0.75000 0.00000 0.972 0.483
## PH53 0.00214 0.00267 0.80200 1.25000 0.00000 0.974 0.137
## PH21 0.00213 0.00143 1.49100 1.00000 0.00000 0.976 0.004 **
## PH82 0.00182 0.00138 1.32200 0.25000 1.00000 0.978 0.078 .
## PD10 0.00180 0.00212 0.84700 1.00000 0.00000 0.980 0.183
## PH48 0.00179 0.00139 1.28200 1.00000 0.00000 0.981 0.053 .
## CAO10 0.00175 0.00130 1.34900 1.00000 0.00000 0.983 0.081 .
## PH55 0.00152 0.00305 0.50000 0.50000 0.00000 0.985 0.847
## CAO48 0.00144 0.00167 0.86300 0.50000 0.00000 0.986 0.080 .
## PH45 0.00115 0.00229 0.50000 0.75000 0.00000 0.988 0.112
## PH33 0.00111 0.00145 0.76400 0.50000 0.00000 0.989 0.101
## CAO47 0.00106 0.00131 0.80600 0.50000 0.00000 0.990 0.110
## CAO01 0.00103 0.00207 0.50000 0.75000 0.00000 0.991 0.109
## CAO04 0.00103 0.00207 0.50000 0.75000 0.00000 0.992 0.109
## CAO27 0.00076 0.00152 0.50000 0.25000 0.00000 0.993 0.117
## CAO28 0.00076 0.00152 0.50000 0.25000 0.00000 0.993 0.993
## PH78 0.00076 0.00152 0.50000 0.25000 0.00000 0.994 0.117
## PH34 0.00073 0.00084 0.86400 0.50000 0.00000 0.995 0.177
## CAO49 0.00068 0.00135 0.50000 0.25000 0.00000 0.996 0.115
## P10 0.00068 0.00135 0.50000 0.25000 0.00000 0.996 0.115
## PH59 0.00068 0.00135 0.50000 0.25000 0.00000 0.997 0.601
## CAO20 0.00038 0.00076 0.50000 0.25000 0.00000 0.997 0.112
## CAO22 0.00038 0.00076 0.50000 0.25000 0.00000 0.998 0.112
## PH35 0.00038 0.00076 0.50000 0.25000 0.00000 0.998 0.112
## PH49 0.00038 0.00076 0.50000 0.25000 0.00000 0.999 0.112
## CAO13 0.00034 0.00069 0.50000 0.25000 0.00000 0.999 0.109
## CAO14 0.00034 0.00069 0.50000 0.25000 0.00000 0.999 0.109
## CAO16 0.00034 0.00069 0.50000 0.25000 0.00000 1.000 0.109
## P13 0.00034 0.00069 0.50000 0.25000 0.00000 1.000 0.109
## CA01 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH15 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH18 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH22 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH37 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO29 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO30 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO31 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO33 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO34 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO35 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO36 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO37 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO38 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO39 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO41 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## P07 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## P08 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PD17 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH61 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH63 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH20 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH27 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH33 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH34 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO42 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO43 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO53 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH73 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH74 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH21 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO54 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO55 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO56 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO60 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO61 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH77 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH35 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAH36 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH64 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH66 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Contrast: cluster1_cluster2
##
## average sd ratio ava avb cumsum p
## CAO39 0.11439 0.20414 0.56040 0.00000 225.00000 0.124 0.713
## P07 0.07808 0.10845 0.72000 0.00000 90.50000 0.208 0.396
## PH24 0.06988 0.07847 0.89060 80.25000 0.25000 0.283 0.296
## PD08 0.06694 0.05855 1.14340 64.50000 1.00000 0.356 0.173
## CAO28 0.05483 0.02827 1.93930 0.25000 71.25000 0.415 0.064 .
## PD17 0.04503 0.03986 1.12990 0.00000 54.50000 0.464 0.120
## PD03 0.04106 0.03881 1.05800 36.50000 0.00000 0.508 0.161
## PH10 0.03718 0.06571 0.56590 44.50000 1.25000 0.548 0.831
## PH13 0.03511 0.02084 1.68470 11.75000 54.50000 0.586 0.433
## CAO29 0.02942 0.02204 1.33470 0.00000 31.75000 0.618 0.112
## CAO46 0.02593 0.03650 0.71040 28.50000 0.00000 0.646 0.596
## CAO02 0.02109 0.02796 0.75440 24.00000 3.75000 0.669 0.698
## CAO21 0.02075 0.02867 0.72390 17.75000 0.00000 0.691 0.427
## CAO05 0.01577 0.01925 0.81930 14.50000 0.00000 0.708 0.276
## CAH18 0.01503 0.01849 0.81270 0.00000 14.75000 0.724 0.345
## PD22 0.01176 0.02192 0.53650 0.00000 7.50000 0.737 0.655
## CAH04 0.01120 0.00869 1.28880 12.75000 7.50000 0.749 0.699
## CAH30 0.01027 0.01035 0.99220 9.00000 3.00000 0.760 0.547
## PH36 0.01019 0.01280 0.79610 10.25000 0.00000 0.771 0.445
## CA01 0.00994 0.01801 0.55160 0.00000 11.75000 0.782 0.639
## PH07 0.00984 0.00595 1.65270 12.75000 2.75000 0.793 0.653
## PH55 0.00938 0.01031 0.91030 0.50000 14.25000 0.803 0.505
## PH51 0.00872 0.01536 0.56730 6.75000 0.00000 0.812 0.566
## PH41 0.00856 0.01484 0.57680 9.25000 0.00000 0.822 0.670
## PH40 0.00724 0.01015 0.71290 7.50000 0.75000 0.829 0.620
## CAO30 0.00719 0.01303 0.55160 0.00000 8.50000 0.837 0.639
## CAO11 0.00682 0.00315 2.16600 6.50000 0.00000 0.844 0.046 *
## PH75 0.00655 0.00819 0.79990 4.50000 2.25000 0.852 0.787
## CAH15 0.00634 0.01150 0.55160 0.00000 7.50000 0.858 0.639
## CAH20 0.00626 0.00759 0.82440 0.00000 4.25000 0.865 0.303
## PH43 0.00625 0.00705 0.88750 5.75000 0.25000 0.872 0.260
## CAO42 0.00621 0.01061 0.58510 0.00000 4.75000 0.879 0.526
## PH52 0.00570 0.00720 0.79190 4.50000 1.50000 0.885 0.763
## PH77 0.00549 0.01023 0.53650 0.00000 3.50000 0.891 0.524
## CAH08 0.00528 0.00423 1.24930 0.75000 7.00000 0.896 0.419
## CAO23 0.00509 0.00946 0.53740 3.75000 0.00000 0.902 0.601
## PH71 0.00493 0.00724 0.68080 4.00000 1.50000 0.907 0.728
## PH32 0.00478 0.00421 1.13410 5.75000 1.50000 0.912 0.787
## PH15 0.00437 0.00673 0.65000 4.50000 0.50000 0.917 0.807
## P08 0.00362 0.00589 0.61350 0.00000 2.75000 0.921 0.485
## PH39 0.00347 0.00399 0.86940 4.00000 0.00000 0.925 0.292
## PH58 0.00337 0.00246 1.37000 1.00000 4.50000 0.928 0.653
## PH59 0.00295 0.00475 0.62070 0.25000 3.25000 0.932 0.640
## CAO56 0.00274 0.00512 0.53650 0.00000 1.75000 0.934 0.524
## CAO60 0.00274 0.00512 0.53650 0.00000 1.75000 0.938 0.524
## PD12 0.00272 0.00549 0.49600 2.00000 0.00000 0.940 0.627
## CAO19 0.00268 0.00430 0.62330 2.25000 0.00000 0.943 0.554
## CAH09 0.00251 0.00332 0.75620 0.75000 2.00000 0.946 0.713
## PH74 0.00225 0.00264 0.85380 0.00000 3.50000 0.949 0.382
## CAH22 0.00219 0.00191 1.14800 0.00000 2.25000 0.951 0.177
## CAO25 0.00204 0.00412 0.49600 1.50000 0.00000 0.953 0.627
## PH27 0.00203 0.00214 0.94720 2.25000 0.00000 0.955 0.336
## PH47 0.00192 0.00230 0.83640 1.75000 0.00000 0.957 0.289
## CAO53 0.00182 0.00338 0.53910 0.00000 1.25000 0.959 0.571
## PH61 0.00169 0.00307 0.55160 0.00000 2.00000 0.961 0.639
## PD14 0.00165 0.00259 0.63850 1.25000 0.25000 0.963 0.623
## CAO43 0.00162 0.00173 0.93420 0.00000 2.25000 0.965 0.378
## CAO55 0.00157 0.00292 0.53650 0.00000 1.00000 0.966 0.524
## CAO33 0.00148 0.00268 0.55160 0.00000 1.75000 0.968 0.639
## CAH33 0.00146 0.00270 0.53910 0.00000 1.00000 0.970 0.571
## CAO07 0.00142 0.00113 1.25430 1.50000 0.00000 0.971 0.149
## CAH37 0.00142 0.00182 0.78150 0.00000 1.50000 0.973 0.380
## PD11 0.00136 0.00161 0.84240 1.25000 0.00000 0.974 0.293
## CAO41 0.00135 0.00151 0.89860 0.00000 2.00000 0.976 0.311
## CAO17 0.00134 0.00208 0.64070 1.50000 0.00000 0.977 0.566
## PH53 0.00118 0.00156 0.75560 1.25000 0.00000 0.978 0.545
## P14 0.00110 0.00139 0.79480 1.00000 0.00000 0.979 0.312
## PH21 0.00108 0.00084 1.28990 1.00000 0.00000 0.981 0.109
## CAO34 0.00106 0.00192 0.55160 0.00000 1.25000 0.982 0.639
## PD10 0.00097 0.00107 0.90400 1.00000 0.25000 0.983 0.729
## CAH21 0.00090 0.00097 0.92690 0.00000 1.25000 0.984 0.366
## PH48 0.00089 0.00084 1.06180 1.00000 0.25000 0.985 0.456
## CAO10 0.00083 0.00074 1.11720 1.00000 0.25000 0.986 0.768
## PH45 0.00068 0.00130 0.51980 0.75000 0.00000 0.986 0.705
## CAO48 0.00066 0.00080 0.82020 0.50000 0.00000 0.987 0.258
## CAO31 0.00063 0.00115 0.55160 0.00000 0.75000 0.988 0.639
## CAH35 0.00063 0.00114 0.55620 0.00000 1.25000 0.988 0.723
## CAO01 0.00063 0.00121 0.52310 0.75000 0.00000 0.989 0.739
## CAO04 0.00063 0.00121 0.52310 0.75000 0.00000 0.990 0.739
## PH33 0.00055 0.00069 0.79480 0.50000 0.00000 0.990 0.312
## CAO47 0.00054 0.00066 0.81930 0.50000 0.00000 0.991 0.276
## PH66 0.00051 0.00091 0.55620 0.00000 1.00000 0.992 0.723
## PH34 0.00044 0.00050 0.87400 0.50000 0.00000 0.992 0.292
## CAO35 0.00042 0.00077 0.55160 0.00000 0.50000 0.992 0.639
## CAO37 0.00042 0.00077 0.55160 0.00000 0.50000 0.993 0.639
## CAO38 0.00042 0.00077 0.55160 0.00000 0.50000 0.993 0.639
## PH63 0.00042 0.00077 0.55160 0.00000 0.50000 0.994 0.639
## CAO54 0.00039 0.00073 0.53650 0.00000 0.25000 0.994 0.524
## CAO61 0.00039 0.00073 0.53650 0.00000 0.25000 0.995 0.524
## CAH27 0.00036 0.00068 0.53910 0.00000 0.25000 0.995 0.571
## CAH34 0.00036 0.00068 0.53910 0.00000 0.25000 0.995 0.571
## PH73 0.00036 0.00068 0.53910 0.00000 0.25000 0.996 0.571
## CAO27 0.00034 0.00069 0.49600 0.25000 0.00000 0.996 0.627
## PH78 0.00034 0.00069 0.49600 0.25000 0.00000 0.997 0.627
## CAO49 0.00032 0.00064 0.50010 0.25000 0.00000 0.997 0.671
## P10 0.00032 0.00064 0.50010 0.25000 0.00000 0.997 0.671
## CAH36 0.00025 0.00046 0.55620 0.00000 0.50000 0.998 0.723
## CAO20 0.00023 0.00043 0.51980 0.25000 0.00000 0.998 0.705
## CAO22 0.00023 0.00043 0.51980 0.25000 0.00000 0.998 0.705
## PH35 0.00023 0.00043 0.51980 0.25000 0.00000 0.998 0.705
## PH49 0.00023 0.00043 0.51980 0.25000 0.00000 0.999 0.705
## CAO36 0.00021 0.00038 0.55160 0.00000 0.25000 0.999 0.639
## CAO13 0.00021 0.00040 0.52310 0.25000 0.00000 0.999 0.739
## CAO14 0.00021 0.00040 0.52310 0.25000 0.00000 0.999 0.739
## CAO16 0.00021 0.00040 0.52310 0.25000 0.00000 0.999 0.739
## P13 0.00021 0.00040 0.52310 0.25000 0.00000 1.000 0.739
## PH82 0.00021 0.00040 0.52310 0.25000 0.00000 1.000 0.959
## PH64 0.00013 0.00023 0.55620 0.00000 0.25000 1.000 0.723
## CAH23 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO45 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH69 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH70 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Contrast: deep_cluster2
##
## average sd ratio ava avb cumsum p
## CAO39 0.14351 0.28450 0.50440 0.00000 225.00000 0.146 0.119
## P07 0.12397 0.17549 0.70640 0.00000 90.50000 0.273 0.118
## CAO28 0.09476 0.04262 2.22350 0.00000 71.25000 0.370 0.032 *
## PH13 0.08343 0.04003 2.08410 1.00000 54.50000 0.455 0.001 ***
## PD17 0.07255 0.06390 1.13530 0.00000 54.50000 0.529 0.118
## CAO29 0.05425 0.05377 1.00900 0.00000 31.75000 0.584 0.069 .
## PH69 0.05287 0.03572 1.48000 24.00000 0.00000 0.638 0.054 .
## CAH18 0.02927 0.04462 0.65610 0.00000 14.75000 0.668 0.108
## PD22 0.02900 0.04794 0.60480 3.00000 7.50000 0.698 0.161
## PH10 0.02529 0.01772 1.42750 13.00000 1.25000 0.724 0.507
## PH55 0.01525 0.01762 0.86560 0.00000 14.25000 0.739 0.191
## CA01 0.01480 0.02960 0.50000 0.00000 11.75000 0.754 0.118
## CAH20 0.01440 0.01923 0.74900 0.00000 4.25000 0.769 0.086 .
## CAO45 0.01322 0.00893 1.48000 6.00000 0.00000 0.783 0.054 .
## CAO42 0.01322 0.02518 0.52490 0.00000 4.75000 0.796 0.113
## PH77 0.01306 0.02612 0.50000 0.00000 3.50000 0.809 0.123
## CAH04 0.01230 0.00843 1.45900 1.00000 7.50000 0.822 0.376
## CAO30 0.01071 0.02141 0.50000 0.00000 8.50000 0.833 0.118
## PH07 0.00999 0.01601 0.62370 0.00000 2.75000 0.843 0.452
## CAH15 0.00945 0.01889 0.50000 0.00000 7.50000 0.853 0.118
## CAH08 0.00884 0.00682 1.29640 0.00000 7.00000 0.862 0.120
## PH75 0.00840 0.01679 0.50000 0.00000 2.25000 0.870 0.354
## P08 0.00764 0.01402 0.54470 0.00000 2.75000 0.878 0.113
## PH58 0.00675 0.00607 1.11230 0.00000 4.50000 0.885 0.073 .
## CAH09 0.00655 0.01065 0.61540 0.00000 2.00000 0.892 0.136
## CAH30 0.00655 0.01071 0.61160 0.00000 3.00000 0.898 0.616
## CAO56 0.00653 0.01306 0.50000 0.00000 1.75000 0.905 0.123
## CAO60 0.00653 0.01306 0.50000 0.00000 1.75000 0.912 0.123
## PH52 0.00546 0.00892 0.61180 0.00000 1.50000 0.917 0.450
## PH32 0.00519 0.00603 0.86070 0.00000 1.50000 0.923 0.404
## CAO02 0.00472 0.00945 0.50000 0.00000 3.75000 0.927 0.888
## CAH23 0.00441 0.00298 1.48000 2.00000 0.00000 0.932 0.054 .
## PH70 0.00441 0.00298 1.48000 2.00000 0.00000 0.936 0.054 .
## PH59 0.00409 0.00819 0.50000 0.00000 3.25000 0.941 0.153
## PH71 0.00409 0.00345 1.18390 0.00000 1.50000 0.945 0.381
## CAO53 0.00398 0.00796 0.50000 0.00000 1.25000 0.949 0.113
## CAH22 0.00397 0.00430 0.92320 0.00000 2.25000 0.953 0.083 .
## CAO55 0.00373 0.00746 0.50000 0.00000 1.00000 0.957 0.123
## PH74 0.00350 0.00407 0.85910 0.00000 3.50000 0.960 0.194
## CAH33 0.00318 0.00637 0.50000 0.00000 1.00000 0.964 0.113
## PD08 0.00298 0.00326 0.91180 0.00000 1.00000 0.967 0.984
## CAO43 0.00270 0.00322 0.84000 0.00000 2.25000 0.969 0.221
## PH61 0.00252 0.00504 0.50000 0.00000 2.00000 0.972 0.118
## CAH37 0.00237 0.00302 0.78550 0.00000 1.50000 0.974 0.138
## CAO33 0.00220 0.00441 0.50000 0.00000 1.75000 0.977 0.118
## PH82 0.00220 0.00149 1.48000 1.00000 0.00000 0.979 0.032 *
## CAO41 0.00190 0.00241 0.78510 0.00000 2.00000 0.981 0.180
## PH15 0.00187 0.00373 0.50000 0.00000 0.50000 0.983 0.639
## CAO34 0.00157 0.00315 0.50000 0.00000 1.25000 0.984 0.118
## CAH21 0.00157 0.00188 0.83650 0.00000 1.25000 0.986 0.207
## CAO31 0.00094 0.00189 0.50000 0.00000 0.75000 0.987 0.118
## CAO10 0.00093 0.00187 0.50000 0.00000 0.25000 0.988 0.479
## PH24 0.00093 0.00187 0.50000 0.00000 0.25000 0.989 0.927
## CAO54 0.00093 0.00187 0.50000 0.00000 0.25000 0.990 0.123
## CAO61 0.00093 0.00187 0.50000 0.00000 0.25000 0.991 0.123
## PD10 0.00080 0.00159 0.50000 0.00000 0.25000 0.991 0.579
## CAH27 0.00080 0.00159 0.50000 0.00000 0.25000 0.992 0.113
## CAH34 0.00080 0.00159 0.50000 0.00000 0.25000 0.993 0.113
## PH73 0.00080 0.00159 0.50000 0.00000 0.25000 0.994 0.113
## CAH35 0.00079 0.00159 0.50000 0.00000 1.25000 0.995 0.119
## PH66 0.00064 0.00127 0.50000 0.00000 1.00000 0.995 0.119
## CAO35 0.00063 0.00126 0.50000 0.00000 0.50000 0.996 0.118
## CAO37 0.00063 0.00126 0.50000 0.00000 0.50000 0.997 0.118
## CAO38 0.00063 0.00126 0.50000 0.00000 0.50000 0.997 0.118
## PH63 0.00063 0.00126 0.50000 0.00000 0.50000 0.998 0.118
## PH40 0.00048 0.00095 0.50000 0.00000 0.75000 0.998 0.921
## CAH36 0.00032 0.00064 0.50000 0.00000 0.50000 0.999 0.119
## PH48 0.00031 0.00063 0.50000 0.00000 0.25000 0.999 0.886
## PD14 0.00031 0.00063 0.50000 0.00000 0.25000 0.999 0.774
## CAO36 0.00031 0.00063 0.50000 0.00000 0.25000 1.000 0.118
## PH43 0.00016 0.00032 0.50000 0.00000 0.25000 1.000 0.940
## PH64 0.00016 0.00032 0.50000 0.00000 0.25000 1.000 0.119
## CAO01 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO04 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO07 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO11 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO13 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO14 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO16 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO17 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO21 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO46 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## P13 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## P14 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PD03 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH21 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH27 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH33 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH34 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH39 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH51 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH53 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO05 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO19 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO20 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO22 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO47 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PD11 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH35 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH36 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH41 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH45 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH47 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH49 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO23 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO25 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO27 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO48 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PD12 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## PH78 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## CAO49 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## P10 0.00000 0.00000 NaN 0.00000 0.00000 1.000 NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Permutation: free
## Number of permutations: 999
comparisons <- c("cluster1_cluster2", "cluster2_deep", "cluster1_deep")
simper.results <- c()
for(i in 1:length(comparisons)) {
temp <- summary(simper_results)[as.character(comparisons[i])] %>%
as.data.frame()
colnames(temp) <- gsub(
paste(comparisons[i],".", sep = ""), "", colnames(temp))
temp <- temp %>%
mutate(Comparison = comparisons[i],
Position = row_number()) %>%
rownames_to_column(var = "Species")
simper.results <- rbind(simper.results, temp)
}
simper.results %>%
filter(p <= 0.05) %>%
select(Species, average, Comparison, Position)
## Species average Comparison Position
## 1 CAO11 0.006815392 cluster1_cluster2 27
## 2 PD08 0.131214067 cluster1_deep 1
## 3 CAH04 0.030071020 cluster1_deep 10
## 4 PH07 0.025072437 cluster1_deep 11
## 5 CAH30 0.024802611 cluster1_deep 12
## 6 CAO11 0.013130034 cluster1_deep 19
## 7 PH21 0.002127526 cluster1_deep 45
p1 <- ggplot(data = simper.results,
aes(x = Position, y = cumsum)) +
geom_line(aes(colour = Comparison)) +
theme_bw()
p2 <- ggplot(data = simper.results,
aes(x = Position, y = average)) +
geom_line(aes(colour = Comparison)) +
theme_bw()
ggarrange(p1, p2, common.legend = TRUE)
#How to interpret: A straight diagonal line would indicate equal contributions by species. So in our case, it probably a few species that contribute the most. Also in p2 it gets obvious that over 50% of species are not contributing to the differences at all (the line is on 0).
# Filter species with p <= 0.05 and position <= 10 (top 10 significant contributors)
top_simper <- simper.results %>%
filter(p <= 0.05, Position <= 20)
# Check the filtered table
print(top_simper)
## Species average sd ratio ava avb cumsum p
## 1 PD08 0.13121407 0.110533613 1.1870965 64.50 0 0.1351082 0.049
## 2 CAH04 0.03007102 0.024694299 1.2177313 12.75 1 0.6577731 0.021
## 3 PH07 0.02507244 0.011721025 2.1390994 12.75 0 0.6835896 0.012
## 4 CAH30 0.02480261 0.026537368 0.9346297 9.00 0 0.7091283 0.047
## 5 CAO11 0.01313003 0.003851628 3.4089569 6.50 0 0.8250484 0.001
## Comparison Position
## 1 cluster1_deep 1
## 2 cluster1_deep 10
## 3 cluster1_deep 11
## 4 cluster1_deep 12
## 5 cluster1_deep 19
#Visualize the results from above:
# Add cluster to your long-format species data
species_df <- community_matrix %>%
as.data.frame() %>%
rownames_to_column("dive_name") %>%
left_join(cluster_assignments, by = "dive_name") %>%
pivot_longer(-c(dive_name, cluster), names_to = "species", values_to = "abundance")
# Keep only indicator species - taken from results above
indicators <- c("CAO11", "CAO21", "PD03", "CAO29", "PD17", "CAO28")
species_df_filtered <- species_df %>%
filter(species %in% indicators)
# Plot
ggplot(species_df_filtered, aes(x = species, y = abundance, fill = cluster)) +
geom_bar(stat = "summary", fun = "mean", position = "dodge") +
labs(title = "Indicator Species Abundance by Cluster", y = "Mean Abundance") +
theme_minimal()
# Is geography or depth main reason? West locations are also deeper than in east
# Partial dbRDA: test depth effect controlling for geography (lat, lon)
partial_dbRDA_depth <- capscale(bray.df ~ depth + Condition(lat + lon), data = env2.z)
anova(partial_dbRDA_depth) # test significance of depth after geography
## Permutation test for capscale under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = bray.df ~ depth + Condition(lat + lon), data = env2.z)
## Df SumOfSqs F Pr(>F)
## Model 1 0.42445 1.3698 0.207
## Residual 5 1.54928
# Partial dbRDA: test geography effect controlling for depth
partial_dbRDA_geo <- capscale(bray.df ~ lat + lon + Condition(depth), data = env2.z)
anova(partial_dbRDA_geo) # test significance of geography after depth
## Permutation test for capscale under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = bray.df ~ lat + lon + Condition(depth), data = env2.z)
## Df SumOfSqs F Pr(>F)
## Model 2 0.89615 1.4461 0.119
## Residual 5 1.54928
#Meaning:
#Depth effect (controlling for lat/lon) p = 0.23 (not significant) Depth alone does not explain a significant part of community variation after accounting for geography.
#Geography effect (controlling for depth) p = 0.125 (not significant) Geography (lat + lon) also does not explain a significant part after accounting for depth.
# Assuming env.data.z has depth, lat, lon
var_part <- varpart(bray.df, ~ depth, ~ lat + lon, data = env2.z)
plot(var_part)
print(var_part)
##
## Partition of squared Unknown user-supplied distance in dbRDA
##
## Call: varpart(Y = bray.df, X = ~depth, ~lat + lon, data = env2.z)
##
## Explanatory tables:
## X1: ~depth
## X2: ~lat + lon
##
## No. of explanatory tables: 2
## Total variation (SS): 3.1032
## No. of observations: 9
##
## Partition table:
## Df R.squared Adj.R.squared Testable
## [a+c] = X1 1 0.21195 0.09937 TRUE
## [b+c] = X2 2 0.36396 0.15195 TRUE
## [a+b+c] = X1+X2 3 0.50074 0.20118 TRUE
## Individual fractions
## [a] = X1|X2 1 0.04924 TRUE
## [b] = X2|X1 2 0.10181 TRUE
## [c] 0 0.05014 FALSE
## [d] = Residuals 0.79882 FALSE
## ---
## Use function 'dbrda' to test significance of fractions of interest
#Results:
#Depth alone (after accounting for geography): ~4.65%
#Geography alone (after accounting for depth): ~9.82%
#Shared effect of depth & geography (overlap): ~5.37%
#Residuals (unexplained variation): ~80.16%
# Test unique effect of depth (a)
depth_only <- dbrda(bray.df ~ depth + Condition(lat + lon), data = env2.z)
anova(depth_only)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = bray.df ~ depth + Condition(lat + lon), data = env2.z)
## Df SumOfSqs F Pr(>F)
## Model 1 0.42445 1.3698 0.207
## Residual 5 1.54928
# Test unique effect of geography (b)
geo_only <- dbrda(bray.df ~ lat + lon + Condition(depth), data = env2.z)
anova(geo_only)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = bray.df ~ lat + lon + Condition(depth), data = env2.z)
## Df SumOfSqs F Pr(>F)
## Model 2 0.89615 1.4461 0.114
## Residual 5 1.54928
# Test combined effect (a+b+c)
full_model <- dbrda(bray.df ~ depth + lat + lon, data = env2.z)
anova(full_model)
## Permutation test for dbrda under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: dbrda(formula = bray.df ~ depth + lat + lon, data = env2.z)
## Df SumOfSqs F Pr(>F)
## Model 3 1.5539 1.6716 0.012 *
## Residual 5 1.5493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pcoa.bray <- cmdscale(bray.df, k = 2, eig = T)
# print(pcoa.bray) #explore output if wanted
# extract axis positions for each site from cmdscale object and create a dataframe for plotting
pcoa.bray.plotting <- as.data.frame(pcoa.bray$points)
colnames(pcoa.bray.plotting) <- c("pcoa1", "pcoa2")
# Create alpha diversity dataframe, include info about environment and locations
pcoa.bray.plotting <- cbind(pcoa.bray.plotting, ENV2)
# calculate the proportion of variance in the data which is explained by the first two PCoA axes
pcoa.bray$eig[1]/(sum(pcoa.bray$eig))
## [1] 0.3028843
pcoa.bray$eig[2]/(sum(pcoa.bray$eig))
## [1] 0.1820574
pcoa.bray.plotting$site <- data.c$dive_name
#pcoa.bray.plotting$watermass <- data.c$watermass
ggplot(pcoa.bray.plotting, aes(x = pcoa1, y = pcoa2, label=site, color=site)) +
geom_point(size = 4) +
#scale_shape_manual(values = c(15, 16, 17, 18, 19))+
#ggtitle("PCA of Bray Curtis Dissimilarities") +
geom_text_repel(min.segment.length = 0, size=4, box.padding = 1) +
labs(col = "Site",
x = "PCoA1",
y = "PCoA2",
size = 5) +
theme_linedraw() +
# Confidence ellipses
# stat_ellipse(aes(col = current), level = 0.95, linetype = "dashed") +
theme(panel.grid = element_blank(),
legend.title = element_text(size = 10),
legend.text = element_text(size = 10),
axis.title.x = element_text (size = 14),
axis.title.y = element_text(size = 14),
legend.background = element_blank(),
legend.box.background= element_rect(colour="black"),
legend.position = "right")
## Warning: ggrepel: 5 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
# Test the significance of factors on the distances
# TEST: Adonis2/PERMANOVA. Multivariate test for distance matrix as bray curtis e.g.
variables <- c("depth", "medium","lon","lat")
results <- lapply(variables, function(var) adonis2(as.formula(paste("bray.df ~", var)), data = data.c))
results
## [[1]]
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(paste("bray.df ~", var)), data = data.c)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.65772 0.21195 1.8827 0.011 *
## Residual 7 2.44544 0.78805
## Total 8 3.10316 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [[2]]
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(paste("bray.df ~", var)), data = data.c)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.42972 0.13848 1.1252 0.34
## Residual 7 2.67344 0.86152
## Total 8 3.10316 1.00000
##
## [[3]]
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(paste("bray.df ~", var)), data = data.c)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.7630 0.24588 2.2823 0.007 **
## Residual 7 2.3401 0.75412
## Total 8 3.1032 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## [[4]]
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(paste("bray.df ~", var)), data = data.c)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3678 0.11852 0.9412 0.533
## Residual 7 2.7354 0.88148
## Total 8 3.1032 1.00000
# Extract key stats from each adonis2 result
results_summary <- lapply(results, function(res) {
data.frame(
R2 = res$R2[1],
F = res$F[1],
p_value = res$`Pr(>F)`[1]
)
}) %>%
bind_rows() %>%
mutate(variable = variables)
# Reorder columns for clarity
results_summary <- results_summary %>% select(variable, everything())
print(results_summary)
## variable R2 F p_value
## 1 depth 0.2119520 1.8827080 0.011
## 2 medium 0.1384785 1.1251599 0.340
## 3 lon 0.2458801 2.2823439 0.007
## 4 lat 0.1185239 0.9412249 0.533
Morphotype clustering
The cluster analysis is an exploratory data analysis, and can be used to group a set of data into clusters. First, I am exploring the species data (morphotype-level) to assess clustering patterns among different morphotypes per site. The dissimilarity between morphotypes is computed using different clustering methods to find the best fit. I am using the site grouped dataset (SPE2) because I want to see which sites share similar morphotype compositions.
# First: Species data clustering
# Used Borcard's suggestion to explore the species data
# Set seed for reproducibility
set.seed(123)
# Compute matrix of chord distance among morphotypes
spe.ch <- as.matrix(vegdist(SPE2.hel, "euc"))
rownames(spe.ch) <- rownames(SPE2.hel) # Ensure meaningful labels if available
colnames(spe.ch) <- rownames(SPE2.hel)
spe.ch <- as.dist(spe.ch)
# Compute different hierarchical clustering methods
spe.ch.single <- hclust(spe.ch, method = "single")
spe.ch.complete <- hclust(spe.ch, method = "complete")
spe.ch.UPGMA <- hclust(spe.ch, method = "average") # UPGMA (best cophenetic correlation)
spe.ch.ward <- hclust(spe.ch, method = "ward.D2")
# Plot dendrograms
par(mfrow = c(2,2)) # Arrange plots in a grid
plot(spe.ch.single, main = "Chord - Single linkage", cex = 0.8)
plot(spe.ch.complete, main = "Chord - Complete linkage", cex = 0.8)
plot(spe.ch.UPGMA, main = "Chord - UPGMA", cex = 0.8)
plot(spe.ch.ward, main = "Chord - Ward", cex = 0.8)
par(mfrow = c(1,1)) # Reset plot layout
# Compute cophenetic correlation coefficients to evaluate clustering methods
spe.ch.single.coph <- cophenetic(spe.ch.single)
spe.ch.comp.coph <- cophenetic(spe.ch.complete)
spe.ch.UPGMA.coph <- cophenetic(spe.ch.UPGMA)
spe.ch.ward.coph <- cophenetic(spe.ch.ward)
# Print correlation values
cor(spe.ch, spe.ch.single.coph) # Single Linkage
## [1] 0.8763973
cor(spe.ch, spe.ch.comp.coph) # Complete Linkage
## [1] 0.9231821
cor(spe.ch, spe.ch.UPGMA.coph) # UPGMA (expected highest)
## [1] 0.9313692
cor(spe.ch, spe.ch.ward.coph) # Ward's
## [1] 0.8705801
# Highest correlation found in the UPGMA clustering with 0.88
# Cluster validation using bootstrap resampling
spech.pv <- pvclust(t(SPE2.hel), method.hclust = "average", method.dist = "euc", parallel = FALSE)
## Bootstrap (r = 0.5)... Done.
## Bootstrap (r = 0.6)... Done.
## Bootstrap (r = 0.7)... Done.
## Bootstrap (r = 0.79)... Done.
## Bootstrap (r = 0.89)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.29)... Done.
## Bootstrap (r = 1.39)... Done.
plot(spech.pv)
# Highlight clusters with high AU p-values (≥0.95 = significant)
pvrect(spech.pv, alpha = 0.95, pv = "au")
# Alternative: Determine optimal number of clusters using silhouette method
# Define a range of k values to test
k_values <- 2:8
sil_widths <- numeric(length(k_values))
# Compute silhouette width for each k
for (i in seq_along(k_values)) {
cluster_assignments <- cutree(spe.ch.UPGMA, k = k_values[i]) # Get cluster labels
sil <- silhouette(cluster_assignments, spe.ch) # Compute silhouette scores
sil_widths[i] <- mean(sil[, 3]) # Store average silhouette width
}
# Plot silhouette width vs. number of clusters
plot(k_values, sil_widths, type = "b", pch = 19, frame = FALSE,
xlab = "Number of Clusters (k)", ylab = "Average Silhouette Width",
main = "Optimal Cluster Selection via Silhouette Width")
abline(v = k_values[which.max(sil_widths)], col = "red", lty = 2) # Mark optimal k
cluster_membership <- cutree(spe.ch.UPGMA, k = 3)
table(cluster_membership)
## cluster_membership
## 1 2 3
## 4 1 4
# Final reordered dendrogram with chosen clusters
# Convert to dendrogram and reorder it
dend <- as.dendrogram(spe.ch.UPGMA)
dend_reordered <- reorder(dend, spe.ch)
plot(dend_reordered, hang = 0, xlab = "Cluster Groups", ylab = "Height", main = "Chord - UPGMA (Reordered)")
## Warning in plot.window(...): "hang" is not a graphical parameter
## Warning in plot.xy(xy, type, ...): "hang" is not a graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "hang" is not a
## graphical parameter
## Warning in axis(side = side, at = at, labels = labels, ...): "hang" is not a
## graphical parameter
## Warning in title(...): "hang" is not a graphical parameter
rect.hclust(as.hclust(dend_reordered), k = 3) # add cluster boxes
#Rows are dive IDs -
Environmental clustering
Continuing with environmental data. Which sites share cluster together that share similar environmental conditions? Does this correspond to geography? How does it relate to the species clusters?
# Environmental data clustering analysis
# Ensure NA rows are removed for environmental data
# Compute the distance matrix using Euclidean distance
env.ch <-as.matrix(vegdist(na.omit(env2.z), "euc")) # Attach site names to object of class 'dist'
rownames(env.ch) <- c("03","04", "05" , "06", "07" ,"08", "09", "10", "11")
colnames(env.ch) <- c("03","04", "05" , "06", "07" ,"08", "09", "10", "11")
env.ch <- as.dist(env.ch)
# Compute agglomerative clustering methods
env.ch.single <- hclust(env.ch, method = "single")
env.ch.complete <- hclust(env.ch, method = "complete")
env.ch.UPGMA <- hclust(env.ch, method = "average") # UPGMA
env.ch.ward <- hclust(env.ch, method = "ward.D2")
# Plot dendrograms for different methods
par(mfrow = c(2, 2)) # Arrange plots in a grid
plot(env.ch.single, main = "Chord - Single linkage")
plot(env.ch.complete, main = "Chord - Complete linkage")
plot(env.ch.UPGMA, main = "Chord - UPGMA")
plot(env.ch.ward, main = "Chord - Ward")
par(mfrow = c(1, 1)) # Reset plot layout
# Compute cophenetic correlations to assess clustering method
env.ch.single.coph <- cophenetic(env.ch.single)
env.ch.comp.coph <- cophenetic(env.ch.complete)
env.ch.UPGMA.coph <- cophenetic(env.ch.UPGMA)
env.ch.ward.coph <- cophenetic(env.ch.ward)
# Print correlation values for each method
cor(env.ch, env.ch.single.coph) # Single Linkage
## [1] 0.876315
cor(env.ch, env.ch.comp.coph) # Complete Linkage
## [1] 0.878266
cor(env.ch, env.ch.UPGMA.coph) # UPGMA (highest correlation)
## [1] 0.8874341
cor(env.ch, env.ch.ward.coph) # Ward's - Actually shows the zonation of east/west...
## [1] 0.6232907
# Perform p-value bootstrapping to validate clusters (approximate unbiased p-values)
envch.pv <- pvclust(t(env2.z), method.hclust = "average", method.dist = "eucl", parallel = TRUE)
## Creating a temporary cluster...done:
## socket cluster with 13 nodes on host 'localhost'
## Multiscale bootstrap... Done.
plot(envch.pv)
pvrect(envch.pv, alpha = 0.95, pv = "au") # Highlight significant clusters (AU p-value ≥ 0.95)
# Reorder the dendrogram
env.chwo <- reorder(env.ch.UPGMA, env.ch)
# Plot the reordered dendrogram
plot(env.chwo, hang = 0, xlab = "3 groups", ylab = "Height", main = "Chord - UPGMA (Reordered)")
rect.hclust(env.chwo, k = 3) # Highlight 3 clusters
# PCA on Hellinger-transformed species data (SPE.hel)
spe.pca <- prcomp(SPE.hel, scale = TRUE) # You can scale if needed
# PCA plot for species data
biplot(spe.pca, main = "PCA of Species Data") # You can adjust the biplot options to visualize axes and vectors better
#screeplot(spe.pca)
# PCA on Environmental Data (env.z standardized)
env.pca <- prcomp(env.z, scale = TRUE) # exclude the previously added site and water mass and feature columns (not needed here anyways)
biplot(env.pca, main = "PCA of Environmental Data") # Similar to species PCA, biplot shows the principal components
#screeplot(env.pca)
#Different approach
pca_result <- PCA(bray.df, graph = FALSE)
# Create a data frame for plotting
pca_data <- data.frame(pca_result$ind$coord)
pca_data$dive_name <- rownames(pca_data)
# Create diversity dataframe, include info about environmnent and locations
pca_data <- cbind(pca_data, env2.z)
ggplot(pca_data, aes(x = Dim.1, y = Dim.2, color = depth)) +
geom_point(size = 3) +
scale_shape_manual(values = c(15, 16, 17, 18, 19, 20, 21, 22, 23))+
ggtitle("PCA of Morphotype Diversity by Location") +
theme_minimal() +
theme(legend.position = "right")
> Procustes Analysis
Procrustes analysis tests whether the species composition (PCA on species data) and environmental variables (PCA on environmental data) are significantly correlated. If they are, it suggests that the species distributions are likely influenced by the environmental gradients in your dataset. A significant correlation means that the patterns in species composition are in some way “shaped” by the environmental factors, making CCA a suitable next step.
# 2. Perform Procrustes Analysis
#Test significance
env.Procrustes.sign <- protest(env.pca, spe.pca, scores="sites")
env.Procrustes.sign
##
## Call:
## protest(X = env.pca, Y = spe.pca, scores = "sites")
##
## Procrustes Sum of Squares (m12 squared): 0.9825
## Correlation in a symmetric Procrustes rotation: 0.1321
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
Procrustes shows no high correlation between species and env data. R = 0.13, but slighlty. So we can proceed with NMDS, RDA - but there are probably other factors at play that explain the the clustering better than the used environmnetal characters….But since we see the clustering in the ordination analysis, this still gives a good hint.
##NMDS
# 2. Run NMDS (k = 2 dimensions)
set.seed(123)
# FOR NOW USING DF1 - which has abundances NOT 0/1!!
nmds <- metaMDS(bray.df, k = 2, trymax = 100)
## Run 0 stress 0.04144766
## Run 1 stress 0.04596417
## Run 2 stress 0.0459641
## Run 3 stress 0.04596423
## Run 4 stress 0.04596412
## Run 5 stress 0.1868629
## Run 6 stress 0.0554878
## Run 7 stress 0.1868629
## Run 8 stress 0.0585558
## Run 9 stress 0.04144766
## ... Procrustes: rmse 1.054745e-05 max resid 1.640978e-05
## ... Similar to previous best
## Run 10 stress 0.04596421
## Run 11 stress 0.04144766
## ... Procrustes: rmse 1.008454e-05 max resid 1.592314e-05
## ... Similar to previous best
## Run 12 stress 0.04596426
## Run 13 stress 0.05855594
## Run 14 stress 0.04144766
## ... New best solution
## ... Procrustes: rmse 4.737705e-06 max resid 7.952027e-06
## ... Similar to previous best
## Run 15 stress 0.05548772
## Run 16 stress 0.04144766
## ... Procrustes: rmse 1.314609e-05 max resid 1.893913e-05
## ... Similar to previous best
## Run 17 stress 0.0459641
## Run 18 stress 0.0459642
## Run 19 stress 0.04596409
## Run 20 stress 0.04144767
## ... Procrustes: rmse 2.285186e-05 max resid 3.633656e-05
## ... Similar to previous best
## *** Best solution repeated 3 times
# 3. Extract NMDS site‐scores
nmds_scores <- as.data.frame(scores(nmds, display = "sites"))
nmds_scores$dive_name <- rownames(nmds_scores)
# Prepare NMDS site coordinates matrix
nmds_mat <- nmds_scores %>%
select(NMDS1, NMDS2) %>%
as.matrix()
# Run k-means
set.seed(123)
km_nmds <- kmeans(nmds_mat, centers = 3, nstart = 25)
cluster_assignments <- data.frame(
dive_name = rownames(nmds_scores),
cluster = factor(km_nmds$cluster)
)
nmds_df <- nmds_scores %>%
left_join(cluster_assignments, by = "dive_name")
# 5. Plot NMDS
ggplot(nmds_df, aes(x = NMDS1, y = NMDS2, color = cluster, fill = cluster)) +
geom_point(size = 3) +
geom_text_repel(aes(label = dive_name), size = 3, box.padding = 0.4) +
labs(
title = "NMDS of Coral/Sponge Community (Bray–Curtis)",
subtitle = paste0("Stress = ", round(nmds$stress, 3))
) +
theme_minimal(base_size = 14) +
theme(legend.position = "bottom")
Next: Distance-Based Redundancy Analysis (dbRDA) - dbRDA can be used for non euclidian distances, such as Bray curtis
#dbRDA
# 1) Construct full model if needed, but now I am only interested in depth/lon/lat anyways...
# 2) Run dbRDA (capscale) constraining on depth + lat + lon
dbRDA.mod <- capscale(bray.df ~ depth + lat + lon, data = env2.z)
# Check VIFs
vif_vals <- vif.cca(dbRDA.mod)
print(vif_vals) # make sure no VIF is > 5 (or other chosen threshold)
## depth lat lon
## 2.856744 1.327339 2.738161
# Permutation test for overall model (optional)
anova_all <- anova(dbRDA.mod, permutations = 999)
anova_terms <- anova(dbRDA.mod, by = "terms", permutations = 999)
print(anova_all)
## Permutation test for capscale under reduced model
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = bray.df ~ depth + lat + lon, data = env2.z)
## Df SumOfSqs F Pr(>F)
## Model 3 1.5539 1.6716 0.006 **
## Residual 5 1.5493
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
print(anova_terms)
## Permutation test for capscale under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
##
## Model: capscale(formula = bray.df ~ depth + lat + lon, data = env2.z)
## Df SumOfSqs F Pr(>F)
## depth 1 0.65772 2.1227 0.008 **
## lat 1 0.37118 1.1979 0.258
## lon 1 0.52498 1.6943 0.058 .
## Residual 5 1.54928
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
smry <- summary(dbRDA.mod)
smry
##
## Call:
## capscale(formula = bray.df ~ depth + lat + lon, data = env2.z)
##
## Partitioning of squared Unknown distance:
## Inertia Proportion
## Total 3.103 1.0000
## Constrained 1.554 0.5007
## Unconstrained 1.549 0.4993
##
## Eigenvalues, and their contribution to the squared Unknown distance
##
## Importance of components:
## CAP1 CAP2 CAP3 MDS1 MDS2 MDS3 MDS4
## Eigenvalue 0.7650 0.4860 0.30293 0.6061 0.3257 0.2836 0.21269
## Proportion Explained 0.2465 0.1566 0.09762 0.1953 0.1050 0.0914 0.06854
## Cumulative Proportion 0.2465 0.4031 0.50074 0.6961 0.8010 0.8924 0.96096
## MDS5
## Eigenvalue 0.12115
## Proportion Explained 0.03904
## Cumulative Proportion 1.00000
##
## Accumulated constrained eigenvalues
## Importance of components:
## CAP1 CAP2 CAP3
## Eigenvalue 0.7650 0.4860 0.3029
## Proportion Explained 0.4923 0.3127 0.1950
## Cumulative Proportion 0.4923 0.8050 1.0000
plot(dbRDA.mod)
# Extract site scores (CAP axes) and biplot scores for arrows
# Site scores (samples/dives) for CAP1 & CAP2
site_scores <- as.data.frame(smry$sites[, 1:2])
colnames(site_scores) <- c("CAP1", "CAP2")
site_scores$dive_name <- rownames(site_scores)
# Biplot (environmental vector) scores for arrow plotting
biplot_scores <- as.data.frame(smry$biplot[, 1:2])
colnames(biplot_scores) <- c("CAP1", "CAP2")
biplot_scores$variable <- rownames(biplot_scores)
# Plot the dbRDA in ggplot2
# Pull % variance explained for axis labels
# Extract proportion of variance explained by CAP1 and CAP2
rda_var <- summary(dbRDA.mod)$cont$importance["Proportion Explained", ]
rda1_pct <- round(rda_var[1] * 100, 2) # dbRDA1
rda2_pct <- round(rda_var[2] * 100, 2) # dbRDA2
# A. Extract the coordinates for clustering
scores_mat <- site_scores %>%
select(CAP1, CAP2) %>%
as.matrix()
# B. Run k-means clustering (adjust centers as needed)
set.seed(123) # for reproducibility
km <- kmeans(scores_mat, centers = 3, nstart = 25)
# Convert to named cluster labels
site_scores <- site_scores %>%
mutate(cluster = factor(km$cluster,
labels = c("deep", "cluster1", "cluster2")))
ggplot(site_scores, aes(x = CAP1, y = CAP2, color = cluster, fill = cluster)) +
# (a) Make hulls by cluster
geom_mark_hull(
data = site_scores %>% group_by(cluster) %>% filter(n() > 1) %>% ungroup(),
aes(x = CAP1, y = CAP2, group = cluster, fill = cluster),
alpha = 0.1,
expand = unit(2, "mm"),
show.legend = FALSE
) +
# (b) Biplot arrows (disable inheriting color/cluster from the global aes)
geom_segment(
data = biplot_scores,
aes(x = 0, xend = CAP1, y = 0, yend = CAP2),
inherit.aes = FALSE,
arrow = arrow(length = unit(0.015, "npc")),
color = "grey50"
) +
# (c) Arrow labels (again, no cluster mapping here)
geom_text_repel(
data = biplot_scores,
aes(x = CAP1, y = CAP2, label = variable),
inherit.aes = FALSE,
color = "grey50",
size = 3,
box.padding = 0.3
) +
# (d) Points for each dive (inherits color = cluster)
geom_point(size = 3) +
# (e) Dive labels (inherits nothing extra, just label from site_scores)
geom_text_repel(
aes(label = dive_name),
size = 3,
min.segment.length = 0,
box.padding = 0.4,
show.legend = FALSE
) +
# (f) Zero lines (no cluster mapping)
geom_hline(yintercept = 0, linetype = "dotted", inherit.aes = FALSE) +
geom_vline(xintercept = 0, linetype = "dotted", inherit.aes = FALSE) +
# (g) Axis limits and fixed ratio
coord_fixed(ratio = 1) +
xlim(-2, 2) +
ylim(-2, 2) +
# (h) Axis labels (add your % variance)
xlab(paste0("dbRDA1 (", rda1_pct, "%)")) +
ylab(paste0("dbRDA2 (", rda2_pct, "%)"))+
# (i) Color/fill scales for cluster
scale_color_manual(
values = c("cluster1" = "#1B9E77",
"cluster2" = "#D95F02",
"deep" = "#7570B3"),
name = "Cluster"
) +
scale_fill_manual(
values = c("cluster1" = "#1B9E77",
"cluster2" = "#D95F02",
"deep" = "#7570B3"),
name = "Cluster"
) +
# (j) Theme adjustments
theme_bw(base_size = 14) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "grey90"),
panel.grid.major.y = element_blank()
)
## Warning in geom_hline(yintercept = 0, linetype = "dotted", inherit.aes =
## FALSE): Ignoring unknown parameters: `inherit.aes`
## Warning in geom_vline(xintercept = 0, linetype = "dotted", inherit.aes =
## FALSE): Ignoring unknown parameters: `inherit.aes`
I put this in the same df as the length data, but as sheets. Now I saved each individual sheet as a file, to be able to work with it. All files are availabe from the Supplementary File (just save each sheet separately)
# Read all garden files
# You may need to adapt the path
file_paths <- c(
"./garden_densities/D04_GA.xlsx",
"./garden_densities/D04_GB.xlsx",
"./garden_densities/D07_GA.xlsx",
"./garden_densities/D08_GA.xlsx",
"./garden_densities/D08_GB.xlsx",
"./garden_densities/D11_GA.xlsx"
)
# Read each file into a list of data frames
list_of_dfs <- lapply(file_paths, read_excel)
# Optionally add a garden_id column from the filename for each df
garden_ids <- c("D04_GA", "D04_GB", "D07_GA", "D08_GA", "D08_GB", "D11_GA")
list_of_dfs <- Map(function(df, id) {
df$garden_id <- id
return(df)
}, list_of_dfs, garden_ids)
processed_list <- lapply(list_of_dfs, function(df) {
df %>%
# Summarize counts per frame_no and morphotype (in case of duplicates)
group_by(garden_id, frame_no, frame_time, area_square, morphotype) %>%
summarise(count = sum(count, na.rm = TRUE), .groups = "drop") %>%
# Pivot morphotypes wider as columns
pivot_wider(names_from = morphotype, values_from = count, values_fill = 0) %>%
# Calculate total individuals
mutate(total_ind = rowSums(across(where(is.numeric) & !c(area_square)), na.rm = TRUE)) %>%
# Calculate densities
mutate(
density_per_m_squared = total_ind / area_square,
density_per_10m_squared = density_per_m_squared * 10
) %>%
# Optional: add frame_id
mutate(frame_id = paste(garden_id, frame_no, sep = "_")) %>%
select(garden_id, frame_id, frame_no, frame_time, area_square, everything())
})
# Function to calculate average density per morphotype
summarize_density_df <- function(df) {
df %>%
group_by(garden_id) %>%
summarise(across(starts_with("P") | starts_with("C"), ~ mean(.x / area_square, na.rm = TRUE))) %>%
pivot_longer(cols = -garden_id, names_to = "Morphotype", values_to = "density")
}
# Apply function to all garden data
gardens_df <- bind_rows(lapply(processed_list, summarize_density_df), .id = "df_id")
# Join with taxa data (assumed to be available)
gardens_df <- gardens_df %>%
left_join(taxa, by = "Morphotype")
# Plot function to simplify repeated plot code
plot_density <- function(data, group_col, fill_col, title) {
data %>%
group_by(garden_id, !!sym(group_col)) %>%
summarise(total_density = sum(density)) %>%
mutate(prop_density = total_density / sum(total_density)) %>%
ggplot(aes(x = garden_id, y = total_density, fill = !!sym(group_col))) +
geom_bar(stat = "identity", aes(weight = prop_density), position = "stack") +
labs(x = "Garden ID", y = "Individuals per m^2", title = title) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
}
# Plot densities by Phylum, Morphotype, and Family
plot_density(gardens_df, "Phylum", "Phylum", "Densities by Phylum")
## `summarise()` has grouped output by 'garden_id'. You can override using the
## `.groups` argument.
## Warning in geom_bar(stat = "identity", aes(weight = prop_density), position =
## "stack"): Ignoring unknown aesthetics: weight
plot_density(gardens_df, "Morphotype", "Morphotype", "Densities by Morphotype")
## `summarise()` has grouped output by 'garden_id'. You can override using the
## `.groups` argument.
## Warning in geom_bar(stat = "identity", aes(weight = prop_density), position =
## "stack"): Ignoring unknown aesthetics: weight
plot_density(gardens_df, "Family", "Family", "Densities by Family")
## `summarise()` has grouped output by 'garden_id'. You can override using the
## `.groups` argument.
## Warning in geom_bar(stat = "identity", aes(weight = prop_density), position =
## "stack"): Ignoring unknown aesthetics: weight
# Calculate sponge-dominated gardens
sponge_dominated <- gardens_df %>%
group_by(garden_id, Phylum) %>%
summarise(total_density = sum(density, na.rm = TRUE)) %>%
group_by(garden_id) %>%
mutate(proportion = total_density / sum(total_density, na.rm = TRUE)) %>%
filter(Phylum == "Porifera") %>%
mutate(is_sponge_dominated = proportion >= 0.7)
## `summarise()` has grouped output by 'garden_id'. You can override using the
## `.groups` argument.
# Output results
kable(sponge_dominated) %>% kable_styling(font_size = 7, latex_options = c("striped"))
| garden_id | Phylum | total_density | proportion | is_sponge_dominated |
|---|---|---|---|---|
| D04_GA | Porifera | 1.3653468 | 0.9724049 | TRUE |
| D04_GB | Porifera | 2.0892589 | 0.9146288 | TRUE |
| D07_GA | Porifera | 1.0420924 | 0.7035118 | TRUE |
| D08_GA | Porifera | 0.2269549 | 0.2591885 | FALSE |
| D08_GB | Porifera | 2.5684879 | 0.9580928 | TRUE |
| D11_GA | Porifera | 0.2081262 | 0.0521961 | FALSE |
# Calculate coral-dominated gardens
coral_dominated <- gardens_df %>%
group_by(garden_id, Phylum) %>%
summarise(total_density = sum(density, na.rm = TRUE)) %>%
group_by(garden_id) %>%
mutate(proportion = total_density / sum(total_density, na.rm = TRUE)) %>%
filter(Phylum == "Cnidaria") %>%
mutate(is_coral_dominated = proportion >= 0.7)
## `summarise()` has grouped output by 'garden_id'. You can override using the
## `.groups` argument.
# Output results
kable(coral_dominated) %>% kable_styling(font_size = 7, latex_options = c("striped"))
| garden_id | Phylum | total_density | proportion | is_coral_dominated |
|---|---|---|---|---|
| D04_GA | Cnidaria | 0.0387461 | 0.0275951 | FALSE |
| D04_GB | Cnidaria | 0.1950108 | 0.0853712 | FALSE |
| D07_GA | Cnidaria | 0.4391798 | 0.2964882 | FALSE |
| D08_GA | Cnidaria | 0.6486816 | 0.7408115 | TRUE |
| D08_GB | Cnidaria | 0.1123462 | 0.0419072 | FALSE |
| D11_GA | Cnidaria | 3.7792646 | 0.9478039 | TRUE |
Now do some analysis and plotting of forest data. Bubble plots to show local densitites and dive tracks to get better overview.
# Step 1: Combine all processed garden frame tables
garden_long <- processed_list %>%
bind_rows() %>%
pivot_longer(
cols = -c(garden_id, frame_id, frame_no, frame_time, area_square, total_ind, density_per_m_squared, density_per_10m_squared),
names_to = "morphotype",
values_to = "count"
) %>%
filter(count > 0)
garden_long <- garden_long %>%
mutate(
expedition = "EX2104",
dive_number = stringr::str_extract(garden_id, "\\d+"),
garden = stringr::str_remove(garden_id, "D0*\\d+_"), # GA/GB becomes garden
garden = paste0(dive_number, stringr::str_replace(garden, "G", "")), # 4A, 4B...
frame_no = stringr::str_extract(frame_id, "\\d+$")
)
# Ensure datetime format
garden_long$frame_time <- as.POSIXct(garden_long$frame_time)
data.a$time <- as.POSIXct(data.a$time)
garden_long <- garden_long %>%
rowwise() %>%
mutate(
closest_row = list(
data.a %>%
mutate(time_diff = abs(as.numeric(difftime(time, frame_time, units = "secs")))) %>%
filter(time_diff == min(time_diff, na.rm = TRUE)) %>%
slice(1)
)
) %>%
unnest(cols = c(closest_row), names_sep = ".matched_") %>%
ungroup()
# Identify all current column names
current_names <- names(garden_long)
# Find columns with the prefix
matched_cols <- grep("^closest_row\\.matched_", current_names, value = TRUE)
# Compute the cleaned names
cleaned_names <- gsub("^closest_row\\.matched_", "", matched_cols)
# Keep only those where the cleaned name does NOT already exist
safe_renames <- matched_cols[!cleaned_names %in% current_names]
# Apply safe renaming
garden_long <- garden_long %>%
rename_with(
.cols = all_of(safe_renames),
.fn = ~ gsub("^closest_row\\.matched_", "", .x)
)
garden_long <- garden_long %>%
mutate(
garden = case_when(
grepl("^08", garden) ~ garden, # Keep all "08" gardens separate
TRUE ~ sub("([0-9]{2})[AB]$", "\\1", garden) # Remove trailing A or B from others like "04A" -> "04"
)
)
garden_summary <- garden_long %>%
group_by(garden_id) %>%
summarise(
n_frames = n_distinct(frame_id),
mean_density = mean(density_per_m_squared),
sd_density = sd(density_per_m_squared),
total_individuals = sum(count),
richness = n_distinct(morphotype)
)
garden_summary
## # A tibble: 6 × 6
## garden_id n_frames mean_density sd_density total_individuals richness
## <chr> <int> <dbl> <dbl> <dbl> <int>
## 1 D04_GA 8 2.05 0.485 111 14
## 2 D04_GB 7 3.46 2.07 133 16
## 3 D07_GA 8 2.29 0.897 115 9
## 4 D08_GA 8 1.66 0.751 64 10
## 5 D08_GB 10 3.30 1.27 239 7
## 6 D11_GA 10 4.90 1.84 361 11
garden_frame_density <- garden_long %>%
distinct(garden_id, frame_id, density_per_m_squared)
# Kruskal-Wallis test (non-parametric ANOVA)
kruskal.test(density_per_m_squared ~ garden_id, data = garden_frame_density)
##
## Kruskal-Wallis rank sum test
##
## data: density_per_m_squared by garden_id
## Kruskal-Wallis chi-squared = 16.458, df = 5, p-value = 0.005652
ggplot(garden_frame_density, aes(x = garden_id, y = density_per_m_squared)) +
geom_boxplot() +
theme_minimal() +
ylab("Density (ind/m²)")
## Forest vs non-forest
# I can't compare non-garden vs garden densities because of lacking area data for non garden.
# BUT I can look at the diversity (are gardens more diverse?) OR are the compositions inside the garden different?
#1) DIVERSITY Garden vs non-garden
# OBS: There will be a bias, because gardens have much less frames compared to non gardens obviously.
# Step 1: Sum counts per morphotype for each garden
garden_community <- garden_long %>%
group_by(garden_id, morphotype) %>%
summarise(count = sum(count), .groups = "drop") %>%
pivot_wider(names_from = morphotype, values_from = count, values_fill = 0)
# Step 2: Save garden IDs
garden_ids <- garden_community$garden_id
# Step 3: Remove garden_id for diversity calculation
community_matrix <- garden_community %>% select(-garden_id)
# Step 4: Calculate Shannon diversity for each garden
H_garden <- diversity(community_matrix, index = "shannon")
# Step 5: Combine results into a tidy dataframe
shannon_garden <- data.frame(
garden_id = garden_ids,
H = H_garden
)
# Convert the named vector `H` to a dataframe
H_dive <- data.frame(
dive_name = names(H),
H = as.numeric(H),
source = "non-garden"
)
shannon_garden <- shannon_garden %>%
mutate(dive_name = case_when(
garden_id %in% c("D04_GA", "D04_GB") ~ "EX2104_Dive04",
garden_id == "D07_GA" ~ "EX2104_Dive07",
garden_id %in% c("D08_GA", "D08_GB") ~ "EX2104_Dive08",
garden_id == "D11_GA" ~ "EX2104_Dive11"
),
source = "garden")
H_combined <- bind_rows(
shannon_garden %>% select(dive_name, H, source),
H_dive
)
ggplot(H_combined, aes(x = source, y = H)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.1, size = 2, alpha = 0.6) +
theme_minimal() +
labs(y = "Shannon diversity (H)", x = "Zone")
wilcox.test(H ~ source, data = H_combined)
##
## Wilcoxon rank sum exact test
##
## data: H by source
## W = 6, p-value = 0.01199
## alternative hypothesis: true location shift is not equal to 0
H_combined <- H_combined %>%
mutate(
dive_name_clean = if_else(
source == "non-garden",
str_extract(dive_name, "^[^ ]+"), # Extract substring before first space
dive_name
)
)
# Filter only those dives with garden data - Before I included ALL dive locations, which is also interesting, but not necessarily relevant.
dives_with_garden <- unique(H_combined$dive_name_clean[H_combined$source == "garden"])
H_filtered <- H_combined %>%
filter(dive_name_clean %in% dives_with_garden)
# Summarize mean H per dive and source
H_wide <- H_filtered %>%
group_by(dive_name_clean, source) %>%
summarise(H_mean = mean(H), .groups = "drop") %>%
pivot_wider(names_from = source, values_from = H_mean) %>%
drop_na()
#Paired and unpaired test:
wilcox.test(H_wide$garden, H_wide$`non-garden`, paired = TRUE) # So within each dive, the H is not different significantly.
##
## Wilcoxon signed rank exact test
##
## data: H_wide$garden and H_wide$`non-garden`
## V = 0, p-value = 0.125
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(H_wide$garden, H_wide$`non-garden`, paired = FALSE) # But when looking at all the garden/non garden sites independently, then yes - the gardens are less diverse than the non gardens.
##
## Wilcoxon rank sum exact test
##
## data: H_wide$garden and H_wide$`non-garden`
## W = 1, p-value = 0.05714
## alternative hypothesis: true location shift is not equal to 0
plot_data <- H_filtered %>%
select(dive_name_clean, source, H) %>%
rename(Dive = dive_name_clean, Source = source, Shannon_H = H)
# Optional: order dives by mean Shannon H to make plot clearer
plot_data <- plot_data %>%
group_by(Dive) %>%
mutate(mean_H = mean(Shannon_H)) %>%
ungroup() %>%
arrange(mean_H) %>%
mutate(Dive = factor(Dive, levels = unique(Dive)))
# Plot paired points connected by lines
ggplot(plot_data, aes(x = Source, y = Shannon_H, group = Dive, color = Dive)) +
geom_point(size = 3) +
geom_line(alpha = 0.5) +
theme_minimal() +
labs(title = "Comparison of Shannon Diversity (H) between Garden and Non-Garden Areas",
x = "Area Type",
y = "Shannon Diversity Index (H)") +
theme(legend.position = "none")
> The next stats are not very relevant, because Permanova needs more
than 2 sites to compare, but I only have 1 garden by dive site (or 2).
So ignore.
#2) Compositions
# Test if the compositions are different between garden and non-garden areas.
# I will only use the selected garden dive locations, ignore the rest of the dive locations.
# 1. Prepare garden community matrix (example: counts per garden_id and morphotype)
garden_comm <- garden_long %>%
group_by(garden_id, morphotype) %>%
summarise(total_count = sum(count, na.rm = TRUE)) %>%
pivot_wider(names_from = morphotype, values_from = total_count, values_fill = 0) %>%
ungroup()
## `summarise()` has grouped output by 'garden_id'. You can override using the
## `.groups` argument.
# Make garden sample names rownames
garden_mat <- garden_comm %>%
column_to_rownames("garden_id")
# 2. Prepare non-garden community matrix
# Ensure same columns (species) in both
all_species <- union(colnames(garden_mat), colnames(data.c[,2:113]))
# Add missing species columns filled with zeros
add_missing_cols <- function(mat, species) {
missing <- setdiff(species, colnames(mat))
if(length(missing) > 0){
mat[, missing] <- 0
}
# Reorder columns
mat <- mat[, species]
return(mat)
}
garden_mat <- add_missing_cols(garden_mat, all_species)
# Check column names
all(colnames(SPE.pa) == colnames(garden_mat)) # should be TRUE
## [1] FALSE
# If FALSE, reorder columns in garden_mat to match SPE.pa
garden_mat <- garden_mat[, colnames(SPE.pa)]
SPE.pa_df <- as.data.frame.matrix(SPE.pa)
# Vector of full row names to remove
rows_to_remove <- c(
"EX2104_Dive03 Hopscotch Seamount",
"EX2104_Dive05 Rockaway Seamount",
"EX2104_Dive06 Castle Rock Seamount",
"EX2104_Dive09 Yakutat Shallow Seamount",
"EX2104_Dive10 Yakutat Deep Seamount"
)
# Subset SPE.pa_df excluding those rows
SPE.pa_df <- SPE.pa_df[!rownames(SPE.pa_df) %in% rows_to_remove, ]
# Check remaining rows
rownames(SPE.pa_df)
## [1] "EX2104_Dive04 Dumbbell Seamount"
## [2] "EX2104_Dive07 Corner Rise 1 Seamount"
## [3] "EX2104_Dive08 MacGregor Seamount"
## [4] "EX2104_Dive11 Caloosahatchee Seamount"
combined_mat <- rbind(garden_mat, SPE.pa_df)
# 4. Calculate Bray-Curtis dissimilarity
bray_combined <- vegdist(combined_mat, method = "bray")
# Step 1: Make sure your grouping vector matches the rows of the dissimilarity matrix
group_vector <- c(
rep("garden", 6), # first 6 rows = garden IDs
rep("non-garden", nrow(bray_combined) - 6)
)
# Step 2: Run PERMANOVA
adonis_result <- adonis2(as.dist(bray_combined) ~ group_vector)
print(adonis_result) #This compares ALL gardens to the non garden sites. No pairing here.
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.dist(bray_combined) ~ group_vector)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.3036 0.08649 0.7574 0.699
## Residual 8 3.2069 0.91351
## Total 9 3.5105 1.00000
# Now compare biological community dissimilarities between garden replicates and their corresponding non-garden dive sites
# List of garden-to-dive mappings
garden_to_dive <- list(
"D04" = "EX2104_Dive04 Dumbbell Seamount",
"D07" = "EX2104_Dive07 Corner Rise 1 Seamount",
"D08" = "EX2104_Dive08 MacGregor Seamount",
"D11" = "EX2104_Dive11 Caloosahatchee Seamount"
)
# Convert to a square matrix
bray_combined <- as.matrix(bray_combined)
# Extract garden IDs from the dissimilarity matrix
garden_ids <- rownames(bray_combined)[grepl("^D\\d{2}_G", rownames(bray_combined))]
# Extract unique prefixes to group gardens by site (e.g., D04)
garden_prefixes <- unique(sub("_G.*", "", garden_ids))
# Store results
adonis_results <- list()
for (prefix in garden_prefixes) {
# Get corresponding garden rows (can be GA, GB, etc.)
garden_rows <- grep(paste0("^", prefix, "_G"), rownames(bray_combined), value = TRUE)
# Get matching dive name
dive_name <- garden_to_dive[[prefix]]
# Make sure dive exists in matrix
if (!is.null(dive_name) && dive_name %in% rownames(bray_combined)) {
# Combine garden and non-garden site
subset_ids <- c(garden_rows, dive_name)
# Subset dissimilarity matrix
dist_subset <- as.dist(bray_combined[subset_ids, subset_ids])
# Create metadata
group <- c(rep("garden", length(garden_rows)), "non-garden")
metadata <- data.frame(site = subset_ids, group = group)
# Run PERMANOVA
adonis_res <- adonis2(dist_subset ~ group, data = metadata, permutations = 999)
# Store result
adonis_results[[prefix]] <- adonis_res
}
}
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
## 'nperm' >= set of all permutations: complete enumeration.
## Set of permutations < 'minperm'. Generating entire set.
# Print all results
for (prefix in names(adonis_results)) {
cat("-----", prefix, "-----\n")
print(adonis_results[[prefix]])
cat("\n")
}
## ----- D04 -----
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 5
##
## adonis2(formula = dist_subset ~ group, data = metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.287195 0.916 10.905 0.3333
## Residual 1 0.026337 0.084
## Total 2 0.313532 1.000
##
## ----- D07 -----
## No residual component
##
## adonis2(formula = dist_subset ~ group, data = metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.10932 1
## Residual 0 0.00000 0
## Total 1 0.10932 1
##
## ----- D08 -----
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 5
##
## adonis2(formula = dist_subset ~ group, data = metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.16950 0.26875 0.3675 1
## Residual 1 0.46118 0.73125
## Total 2 0.63068 1.00000
##
## ----- D11 -----
## No residual component
##
## adonis2(formula = dist_subset ~ group, data = metadata, permutations = 999)
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.18983 1
## Residual 0 0.00000 0
## Total 1 0.18983 1
# The sample size is too low for a robust analysis here. But can do a NMDS to explore the data at least:
# Prepare distance matrix (if not already dist)
if (!inherits(bray_combined, "dist")) {
bray_dist <- as.dist(bray_combined)
} else {
bray_dist <- bray_combined
}
names(group_vector) <- rownames(bray_combined)
set.seed(42)
nmds <- metaMDS(bray_dist, k = 2, trymax = 100)
## Run 0 stress 0.05935299
## Run 1 stress 0.059353
## ... Procrustes: rmse 1.15838e-05 max resid 2.319047e-05
## ... Similar to previous best
## Run 2 stress 0.07299383
## Run 3 stress 0.05935299
## ... Procrustes: rmse 3.036709e-05 max resid 6.156312e-05
## ... Similar to previous best
## Run 4 stress 0.05935299
## ... New best solution
## ... Procrustes: rmse 5.185261e-05 max resid 0.0001098099
## ... Similar to previous best
## Run 5 stress 0.08526562
## Run 6 stress 0.08448268
## Run 7 stress 0.07299397
## Run 8 stress 0.0795385
## Run 9 stress 0.0795385
## Run 10 stress 0.08448269
## Run 11 stress 0.06324562
## Run 12 stress 0.07822073
## Run 13 stress 0.07822074
## Run 14 stress 0.07299382
## Run 15 stress 0.05935301
## ... Procrustes: rmse 8.916151e-05 max resid 0.0001884477
## ... Similar to previous best
## Run 16 stress 0.06441006
## Run 17 stress 0.06441006
## Run 18 stress 0.07299386
## Run 19 stress 0.08448268
## Run 20 stress 0.05935299
## ... New best solution
## ... Procrustes: rmse 2.351077e-05 max resid 5.041218e-05
## ... Similar to previous best
## *** Best solution repeated 1 times
scores_df <- as.data.frame(scores(nmds))
scores_df$SampleID <- rownames(scores_df)
scores_df$Group <- group_vector[scores_df$SampleID]
ggplot(scores_df, aes(x = NMDS1, y = NMDS2, color = Group)) +
geom_point(size = 3, alpha = 0.8) +
ggrepel::geom_text_repel(aes(label = SampleID), size = 3) +
stat_ellipse(level = 0.95) +
theme_minimal() +
labs(title = "NMDS: Garden vs Non-Garden Sites",
x = "NMDS1", y = "NMDS2",
color = "Site Group") +
theme(text = element_text(size = 14))
I want to plot the densitites across the forest area, showing porifera and corals and their densitities.
# Basic plot: density by lat/lon
ggplot(garden_long, aes(x = lon, y = lat)) +
geom_point(aes(size = count, color = Phylum, shape = Phylum), alpha = 0.5) +
scale_size_continuous(range = c(1,8)) +
facet_wrap(~ garden, scales = "free") +
labs(
title = "Spatial Distribution of Individuals by Phylum",
x = "Longitude", y = "Latitude",
size = "Count", color = "Phylum", shape = "Phylum"
) +
theme_minimal()
ggplot(garden_long, aes(x = lon, y = lat)) +
geom_point(aes(size = count, color = Phylum, shape = Phylum), alpha = 0.5) +
scale_size_continuous(range = c(1, 8)) +
facet_wrap(~ garden, scales = "free") +
labs(size = "Count", color = "Phylum", shape = "Phylum") +
theme_minimal()
ggplot(garden_long, aes(x = lon, y = lat)) +
geom_point(aes(size = count, color = Phylum, shape = Phylum), alpha = 0.5) +
scale_size_continuous(range = c(0, 12)) +
facet_wrap(~ garden, scales = "free") +
coord_cartesian(expand = TRUE, clip = "off") +
labs(size = "Count", color = "Phylum", shape = "Phylum") +
theme_minimal()
#Add transect line to plots
garden_ranges <- garden_long %>%
group_by(garden) %>%
summarise(start_time = min(frame_time), end_time = max(frame_time), .groups = "drop")
# For each garden, extract the matching transect points
expanded_garden_paths <- purrr::map_dfr(unique(garden_long$garden), function(g) {
range <- garden_ranges %>% filter(garden == g)
data.a %>%
filter(time >= range$start_time, time <= range$end_time) %>%
mutate(garden = g)
})
ggplot(garden_long, aes(x = lon, y = lat)) +
geom_path(data = expanded_garden_paths,
aes(x = lon, y = lat, group = garden),
color = "grey40", linewidth = 0.5, alpha = 0.2) +
geom_point(aes(size = count, color = Phylum, shape = Phylum), alpha = 0.6) +
scale_size_continuous(range = c(0, 12)) +
facet_wrap(~ garden, scales = "free") +
coord_cartesian(clip = "off") +
labs(size = "Count", color = "Phylum", shape = "Phylum") +
theme_minimal()
ggplot() +
geom_smooth(data = expanded_garden_paths,
aes(x = lon, y = lat, group = garden),
method = "loess", span = 0.5, se = FALSE, color = "grey91", linewidth = 4) +
geom_point(data = garden_long,
aes(x = lon, y = lat, size = count, color = Phylum, shape = Phylum),
alpha = 0.6) +
scale_size_continuous(range = c(1, 10)) +
facet_wrap(~ garden, scales = "free") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
# Garden communtities, find the dominant taxa:
#This selects the dominant taxa, all that make up over 25% of the total abundance per garden
dominant_morphotypes_25 <- garden_community %>%
pivot_longer(cols = -garden_id, names_to = "morphotype", values_to = "abundance") %>%
group_by(garden_id) %>%
mutate(
total_abundance = sum(abundance),
rel_abundance = abundance / total_abundance
) %>%
filter(rel_abundance > 0.25) %>% # Filter morphotypes with >30% of total
arrange(garden_id, desc(rel_abundance)) %>%
ungroup()
dominant_morphotypes_25
## # A tibble: 10 × 5
## garden_id morphotype abundance total_abundance rel_abundance
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 D04_GA PD08 36 111 0.324
## 2 D04_GA PH24 36 111 0.324
## 3 D04_GB PD08 53 133 0.398
## 4 D04_GB PH24 37 133 0.278
## 5 D07_GA PD08 62 115 0.539
## 6 D07_GA CAO21 30 115 0.261
## 7 D08_GA CA01 20 64 0.312
## 8 D08_GB P07 159 239 0.665
## 9 D08_GB PD17 70 239 0.293
## 10 D11_GA CAO39 298 361 0.825
garden_ranges <- garden_long %>%
group_by(garden_id) %>%
summarise(
start_time = first(frame_time),
end_time = last(frame_time),
.groups = "drop"
)
# Function to calculate 3D segment distances for a garden
calculate_garden_track <- function(garden) {
garden_data <- data.segmented %>%
filter(time >= garden$start_time & time <= garden$end_time)
if (nrow(garden_data) < 2) return(NULL)
# Calculate 2D haversine distance
dist2D <- distHaversine(
cbind(garden_data$lon_smooth[-nrow(garden_data)], garden_data$lat_smooth[-nrow(garden_data)]),
cbind(garden_data$lon_smooth[-1], garden_data$lat_smooth[-1])
)
# Vertical difference (depth)
delta_depth <- diff(garden_data$depth)
# 3D distance
dist3D <- sqrt(dist2D^2 + delta_depth^2)
tibble(
garden_id = garden$garden_id,
n_points = nrow(garden_data),
track_length_3D_m = sum(dist3D, na.rm = TRUE),
min_depth = min(garden_data$depth, na.rm = TRUE),
max_depth = max(garden_data$depth, na.rm = TRUE),
mean_depth = mean(garden_data$depth, na.rm = TRUE)
)
}
# Apply to each garden
garden_track_summary <- garden_ranges %>%
group_split(garden_id) %>%
map_dfr(calculate_garden_track)
garden_track_summary
## # A tibble: 6 × 6
## garden_id n_points track_length_3D_m min_depth max_depth mean_depth
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 D04_GA 137 54.9 2390. 2414. 2403.
## 2 D04_GB 235 92.0 2357. 2392. 2366.
## 3 D07_GA 189 77.5 2479. 2511. 2495.
## 4 D08_GA 188 77.0 1191. 1244. 1216.
## 5 D08_GB 415 164. 940. 948. 946.
## 6 D11_GA 1317 387. 1209. 1245. 1225.